Skip to content
Snippets Groups Projects
Commit 1990e343 authored by Alessandro Cerioni's avatar Alessandro Cerioni
Browse files

Making the code even more robust w/ respect to missing geometry. Improved...

Making the code even more robust w/ respect to missing geometry. Improved logging for datatype elections.
parent 52feb53e
Branches
Tags
No related merge requests found
......@@ -91,6 +91,8 @@ def old_generate_field_catalog(cfg, mongo_collection):
def elect_field_type( data ):
logging.info("Electing field types...")
fields = data['fields'].keys()
......@@ -100,12 +102,12 @@ def elect_field_type( data ):
types[k] = []
#logging.debug(k)
for db_schema_table, dataset in data['fields'][k].items():
# intra-dataset election
for db_schema_table, analysis in data['fields'][k].items():
#logging.info('\nAnalyzing table: %s' % (db_schema_table))
found_types = set(dataset['types'].keys())
found_types = set(analysis['types'].keys())
if found_types == set(['NoneType']):
continue
......@@ -115,22 +117,22 @@ def elect_field_type( data ):
#if not all(x==found_types[0] for x in found_types): # NOT SAME TYPE: WHICH ONE TO CHOOSE?
if len( found_types ) > 1:
logging.warn('WARNING - MIXED TYPES %s %s' % (k, db_schema_table))
logging.warn('Conflicting datatypes for field "%s" within the table %s.' % (k, db_schema_table))
#print('WARNING - MIXED TYPES', parsed_types)
logging.warn('WARNING - MIXED TYPES %s' % found_types)
#logging.warn('WARNING - MIXED TYPES %s' % found_types)
if 'str' in found_types:
logging.warn('WARNING - MIXED TYPES: str wins!')
logging.warn('Found %s => str wins the election!' % found_types)
types[k].append('str')
#continue
elif found_types == set(['int', 'float']):
logging.warn('WARNING - MIXED TYPES: float wins!')
logging.warn('Found %s => float wins the election!' % found_types)
types[k].append('float')
#continue
else:
logging.warn('WARNING - MIXED TYPES: str wins!')
logging.warn('Found %s => str wins the election!' % found_types)
types[k].append('str')
# elif parsed_types == []:
......@@ -142,17 +144,17 @@ def elect_field_type( data ):
types[k].append(found_types.pop())
# inter-dataset election
for k, v in types.items():
if len( set(v) ) > 1:# and 'null' not in set(v):
logging.debug('CONFLICT: %s, %s' % (k, set(v)))
#logging.warn('Type conflict: %s, %s' % (k, set(v)))
# TODO: how to resolve conflicts?
if set(v) == set(['int','float']):
logging.debug('CONFLICT: float wins!')
logging.warn( 'Type conflict: %s, %s => float wins!' % (k, set(v)) )
types[k] = ['float']
else:
logging.debug('CONFLICT: str wins!')
logging.warn( 'Type conflict: %s, %s => str wins!' % (k, set(v)) )
types[k] = ['str']
# for element in itertools.product(*[fields,fields]):
......@@ -175,6 +177,7 @@ def elect_field_type( data ):
pass # means we got NULL VALUES
#print(k, v)
logging.info('Elections: done!')
return result
......@@ -208,8 +211,10 @@ def generate_field_catalog( cfg, pg, catalog=None ):
# continue
# else:
# found = True
count = pg.count_entries(table)
try:
count = pg.count_entries(table)
except:
count = 'unknown no. of'
#print(count)
db_schema_table = '%s.%s' % (pg.dbname, table)
......@@ -295,6 +300,7 @@ def main(cfg):
pg_connection = Remote(hostname=cfg['postgis']['host'], dbname=dbname, username=cfg['postgis']['username'], password=cfg['postgis']['password'])
logging.info('Done.')
field_catalog = generate_field_catalog( cfg, pg_connection, field_catalog )
print('here')
logging.info("Catalog: built. %i docs were analyzed. " % field_catalog['analyzed_docs'])
# writing results to disk
......@@ -336,10 +342,11 @@ if __name__ == '__main__':
try:
main(cfg)
logging.info('Done!')
except Exception as e:
logging.error(e)
logging.info('Done!')
#main(wait=False, rewrite=True)
# while True:
......
......@@ -88,8 +88,12 @@ class Remote(object):
selected = select(fields)
for entry in self.engine.execute(selected):
items = entry.items()
#print(items)
if geom is not None:
geometry = json.loads(items.pop()[1])
try:
geometry = json.loads(items.pop()[1])
except TypeError:
geom = None
properties = dict(items)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment