Skip to content
Snippets Groups Projects
Commit 0ed26f1d authored by Alessandro Cerioni's avatar Alessandro Cerioni
Browse files

Reducing verbosity. Making the code robust w/ respect to missing geometry.

parent 4fd86f7c
No related branches found
No related tags found
No related merge requests found
...@@ -114,7 +114,7 @@ def elect_field_type( data ): ...@@ -114,7 +114,7 @@ def elect_field_type( data ):
found_types.remove('NoneType') found_types.remove('NoneType')
if not all(x==found_types[0] for x in found_types): # NOT SAME TYPE: WHICH ONE TO CHOOSE? if not all(x==found_types[0] for x in found_types): # NOT SAME TYPE: WHICH ONE TO CHOOSE?
logging.warn('WARNING - MIXED TYPES %s %s' % (k, uuid)) logging.warn('WARNING - MIXED TYPES %s %s' % (k, db_schema_table))
#print('WARNING - MIXED TYPES', parsed_samples) #print('WARNING - MIXED TYPES', parsed_samples)
logging.warn('WARNING - MIXED TYPES %s' % found_types) logging.warn('WARNING - MIXED TYPES %s' % found_types)
...@@ -200,27 +200,14 @@ def generate_field_catalog( pg, catalog=None ): ...@@ -200,27 +200,14 @@ def generate_field_catalog( pg, catalog=None ):
for table in pg.get_tables(schema_name): for table in pg.get_tables(schema_name):
# if str(table) != selected_table: # if str(table) != selected_table:
# continue # continue
db_schema_table = '%s.%s' % (pg.dbname, table)
logging.info('Analyzing table %s. %i docs analyzed so far.' % (db_schema_table, output['analyzed_docs']))
# print(table) # print(table)
# print( pg.count_entries(table) )
for doc in pg.get_entries(table): for doc in pg.get_entries(table):
# print(doc)
properties = doc['properties'] properties = doc['properties']
db_schema_table = '%s.%s' % (pg.dbname, table)
#exit(0)
#dataset_uuid = doc['metadata-fr']['geonet:info']['uuid']
#dataset_title = doc['metadata-fr']['title']
flattened_properties = flatten_json(properties) flattened_properties = flatten_json(properties)
#print(flattened_properties)
#exit(0)
#logging.INFO("Analyzing dataset %s %s" % (dataset_uuid, dataset_title))
#logging.debug('there')
# --------------------------------------------------------------------------------------------- # ---------------------------------------------------------------------------------------------
for k, v in flattened_properties.items(): for k, v in flattened_properties.items():
...@@ -244,15 +231,10 @@ def generate_field_catalog( pg, catalog=None ): ...@@ -244,15 +231,10 @@ def generate_field_catalog( pg, catalog=None ):
output['analyzed_docs'] += 1 output['analyzed_docs'] += 1
#logging.info( "%s/%s" % (output['analyzed_docs'], total) )
logging.info( "%s documents analyzed so far..." % (output['analyzed_docs']) )
# useful for debugging: # useful for debugging:
if cfg['field_type_detector']['debug'] and output['analyzed_docs'] > 10: if cfg['field_type_detector']['debug'] and output['analyzed_docs'] > 10:
return output return output
#print(output)
return output return output
...@@ -270,19 +252,17 @@ def main(cfg): ...@@ -270,19 +252,17 @@ def main(cfg):
#print("(Some of the) output files are already present, and rewrite is disabled. Exiting!") #print("(Some of the) output files are already present, and rewrite is disabled. Exiting!")
raise Exception("(Some of the) output files are already present, and rewrite is disabled!") raise Exception("(Some of the) output files are already present, and rewrite is disabled!")
# TODO for every DB
# connection_string = ('postgresql+psycopg2://{username}:{password}@{hostname}/{dbname}').format(
# hostname=cfg['postgis']['host'], dbname=cfg['postgis']['databases'][0],
# username=cfg['postgis']['username'], password=cfg['postgis']['password'])
dbnames = cfg['postgis']['databases'] dbnames = cfg['postgis']['databases']
field_catalog = {} field_catalog = {}
logging.info('Building catalog...')
for dbname in dbnames: for dbname in dbnames:
logging.info('Analyzing database %s...' % dbname)
pg_connection = Remote(hostname=cfg['postgis']['host'], dbname=dbname, username=cfg['postgis']['username'], password=cfg['postgis']['password']) pg_connection = Remote(hostname=cfg['postgis']['host'], dbname=dbname, username=cfg['postgis']['username'], password=cfg['postgis']['password'])
field_catalog = generate_field_catalog( pg_connection, field_catalog ) field_catalog = generate_field_catalog( pg_connection, field_catalog )
logging.info("Catalog: built. %i docs were analyzed. " % field_catalog['analyzed_docs'])
# writing results to disk # writing results to disk
if not os.path.exists(working_directory): if not os.path.exists(working_directory):
os.mkdir(working_directory) os.mkdir(working_directory)
......
...@@ -74,26 +74,28 @@ class Remote(object): ...@@ -74,26 +74,28 @@ class Remote(object):
def get_entries(self, table): def get_entries(self, table):
columns, geom = self.get_columns(table) columns, geom = self.get_columns(table)
#
#print(geom)
#
fields = [table.c[col.name] for col in columns] fields = [table.c[col.name] for col in columns]
if not geom.type.srid == 4326:
the_geom = table.c[geom.name].ST_Transform(4326).ST_AsGeoJSON() # bug if geom is not None:
else: if not geom.type.srid == 4326:
the_geom = table.c[geom.name].ST_AsGeoJSON() # bug the_geom = table.c[geom.name].ST_Transform(4326).ST_AsGeoJSON() # bug
fields.append(the_geom) else:
the_geom = table.c[geom.name].ST_AsGeoJSON() # bug
fields.append(the_geom)
selected = select(fields) selected = select(fields)
for entry in self.engine.execute(selected): for entry in self.engine.execute(selected):
items = entry.items() items = entry.items()
geometry = json.loads(items.pop()[1])
properties = dict(items) properties = dict(items)
document = { document = {
'type': 'Feature', 'type': 'Feature',
'geometry': geometry, 'properties': properties
'properties': properties} }
if geom is not None:
document['geometry'] = json.loads(items.pop()[1])
# except:
# logging.warn("Invalid geometry.")
yield document yield document
def get_columns(self, table): def get_columns(self, table):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment