Commit 9c75fa20 authored by Fabien FORESTIER's avatar Fabien FORESTIER
Browse files

Merge branch 'development' into 'master'

Development

See merge request !5
parents 47c291b3 915f377e
Pipeline #6746 passed with stages
in 49 seconds
......@@ -126,3 +126,5 @@ dmypy.json
# Pyre type checker
.pyre/
# vim
*.sw[op]
......@@ -40,6 +40,16 @@ La liste complète des arguments est visible en executant la commande suivante:
python tools/alias_copier.py --help
```
## Tests
Install pytest
pip install pytest
Run the tests
python -m pytest
# TODO
* producing indexation reports out of log messages (cf. the branches `Denis_clean_full_datalogger_31Oct` and `Denis_full_datalogs_Stack_October_31`)
......
......@@ -33,8 +33,8 @@ TYPE_PRIORITY_ORDER = (
)
VALID_IP_ADDRESS_REGEX = '(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])'
VALID_HOSTNAME_ADDRESS = '(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.)*([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9\-]*[A-Za-z0-9])'
VALID_IP_ADDRESS_REGEX = r'(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])'
VALID_HOSTNAME_ADDRESS = r'(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.)*([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9\-]*[A-Za-z0-9])'
class Remote(object):
......@@ -142,10 +142,10 @@ class Remote(object):
for elt in value:
return evaluate(elt)
if isinstance(value, str):
if re.match("^\d+?\.\d+?$", value):
if re.match(r"^\d+?\.\d+?$", value):
# TODO float/double
return 'float'
elif re.match("^-?(?!0)\d+$", value):
elif re.match(r"^-?(?!0)\d+$", value):
# TODO short/integer/long
return 'int'
# TODO: date... ip... binary... object... boolean...
......
......@@ -12,10 +12,10 @@ def detect_type_new(value):
for elt in value:
return evaluate(elt)
if isinstance(value, str):
if re.match("^\d+?\.\d+?$", value):
if re.match(r"^\d+?\.\d+?$", value):
# TODO float/double
return 'float'
elif re.match("^-?(?!0)\d+$", value):
elif re.match(r"^-?(?!0)\d+$", value):
# TODO short/integer/long
return 'int'
# TODO: date... ip... binary... object... boolean...
......
This diff is collapsed.
from tools import field_type_detector
import json
import pytest
@pytest.fixture(scope="module")
def catalog():
with open("tests/data/field_catalog_by_field.json", 'r') as fp:
field_catalog_by_field = json.load(fp)
return field_catalog_by_field
@pytest.mark.parametrize('field,expected_type',
[
# acces, {None, 'str'} => str wins!
('acces', 'str'),
# altitude, {'int', 'float'} => float wins!
('altitude', 'float'),
# date, {'date', 'str', 'datetime'} => str wins!
('date', 'str'),
# date_fin, {None, 'date', 'datetime'} => str wins! heu datetime ?
('date_fin', 'datetime'),
# Type conflict: date_debut, {'int', 'date', 'datetime'} => str wins!
('date_debut', 'str'),
# date_ouverture, {'date', 'str'} => str wins!
('date_ouverture', 'str'),
# datecreation, {'date', 'datetime'} => str wins! heu datetime ?
('datecreation', 'datetime'),
# identifiant, {None, 'int', 'str'} => str wins!
('identifiant', 'str'),
# identifiantads, {None, 'int'} => str wins! euh int ?
('identifiantads', 'int'),
# facea, {'bool', None} => str wins! euh bool
('facea', 'bool'),
# last_update, {None, 'datetime'} => str wins! euh datetime ?
('last_update', 'datetime'),
# numero, {None, 'float', 'int', 'str'} => str wins!
('numero', 'str'),
# numerodansvoie, {'int', 'str'} => str wins!
('numerodansvoie', 'str'),
# longueur, {'int', 'float', 'str'} => str wins!
('numero', 'str'),
# sens, {'int', 'str'} => str wins!
('sens', 'str'),
# Données sans elections :
('ascenseur', 'bool'),
('datechangement', 'date'),
('creationtime', 'datetime'),
('a_tpn', 'float'),
('available_bike_stands', 'int'),
('acceptedpaymentmethod', None),
('acceptedpaymentmethod.0', 'str')
])
def test_elect_field_type( field, expected_type, catalog):
field_catalogue = {
"fields": {
field: catalog['fields'][field]
}
}
result = field_type_detector.elect_field_type(field_catalogue)
assert result == {field: expected_type}
def test_types_present(catalog):
types = set()
for field, columns in catalog['fields'].items():
for column, field_types in columns.items():
for field_type in field_types['types'].keys():
types.add(field_type)
assert types == {'int', 'date', 'str', 'NoneType', 'bool', 'datetime', 'float'}
......@@ -110,22 +110,20 @@ def elect_field_type( data ):
#if not all(x==found_types[0] for x in found_types): # NOT SAME TYPE: WHICH ONE TO CHOOSE?
if len( found_types ) > 1:
logging.warn('Conflicting datatypes for field "%s" within the table %s.' % (k, db_schema_table))
logging.warning('Conflicting datatypes for field "%s" within the table %s.' % (k, db_schema_table))
#print('WARNING - MIXED TYPES', parsed_types)
#logging.warn('WARNING - MIXED TYPES %s' % found_types)
#logging.warning('WARNING - MIXED TYPES %s' % found_types)
if 'str' in found_types:
logging.warn('Found %s => str wins the election!' % found_types)
logging.warning('Found %s => str wins the election!' % found_types)
types[k].append('str')
#continue
elif found_types == set(['int', 'float']):
logging.warn('Found %s => float wins the election!' % found_types)
logging.warning('Found %s => float wins the election!' % found_types)
types[k].append('float')
#continue
else:
logging.warn('Found %s => str wins the election!' % found_types)
logging.warning('Found %s => str wins the election!' % found_types)
types[k].append('str')
# elif parsed_types == []:
......@@ -140,14 +138,27 @@ def elect_field_type( data ):
# inter-dataset election
for k, v in types.items():
if len( set(v) ) > 1:# and 'null' not in set(v):
#logging.warn('Type conflict: %s, %s' % (k, set(v)))
v = set(filter(None, v))
# TODO: how to resolve conflicts?
if set(v) == set(['int','float']):
logging.warn( 'Type conflict: %s, %s => float wins!' % (k, set(v)) )
if len(v) == 1:
types[k] = [v.pop()]
continue
# how to resolve conflicts?
if 'str' in v:
logging.warning( 'Type conflict: %s, %s => str wins!' % (k, v) )
types[k] = ['str']
elif v == {'date', 'datetime'}:
logging.warning( 'Type conflict: %s, %s => datetime wins!' % (k, v) )
types[k] = ['datetime']
elif v == set(['int','float']):
logging.warning( 'Type conflict: %s, %s => float wins!' % (k, v) )
types[k] = ['float']
else:
logging.warn( 'Type conflict: %s, %s => str wins!' % (k, set(v)) )
logging.warning( 'UNKNOWN Type conflict: %s, %s => str wins!' % (k, v) )
types[k] = ['str']
# for element in itertools.product(*[fields,fields]):
......@@ -156,7 +167,7 @@ def elect_field_type( data ):
# myre = re.compile(r'%s\.[^0-9]+' % element[0])
#
# if myre.findall(element[1]) != []:
# logging.warn("%s, %s" % (element[0], element[1]))
# logging.warning("%s, %s" % (element[0], element[1]))
# how to handle unknown values for dates ???
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment