......@@ -6,7 +6,9 @@ The most "tedious" part of the workflow regards the heuristic detection of data
Some "editorial metadata" are added to raw (meta)data before actually inserting documents into Elasticsearch (cf. the "doc-indexer" module).
A simplified overview of the entire workflow is provided by the attached []( diagram.
Here is a simplified overview of the entire workflow.
![Indexer workflow diagram](./doc/data-grandlyon-com-indexer-workflow-drawio.png)
......@@ -100,6 +100,9 @@ def elect_field_type( data ):
found_types = set(analysis['types'].keys())
if found_types == set(['NoneType']):
# if no type has been found for a particular field,
# we are still adding the field to the catalog but with a None type
if 'NoneType' in found_types:
......@@ -64,8 +64,14 @@ def fix_field_types( in_docs, out_types ):
out_flattened_properties[prop] = convert_to_str(in_flattened_properties[prop])
elif out_types[lookup_key] == 'bool':
out_flattened_properties[prop] = convert_to_boolean(in_flattened_properties[prop])
elif not out_types[lookup_key]:
# If going through this step it means that a value has been found for that particular field
# so the type for that field shouldn't be null, in order to fix that we launch the recreation
# of the catalog
logging.debug('type %s found, recreating fields catalog', out_types[lookup_key])
raise FieldTypeNotFound(lookup_key)
logging.critical('type %s not supported', out_types[prop])
logging.critical('type %s not supported', out_types[lookup_key])
# pprint
