diff --git a/lib/elasticsearch_template.py b/lib/elasticsearch_template.py index ed4fa9fdf676e8094d49d9b829b8161c8f30a136..ecb67fbb57239b5fd5994bd1e9e08347d2203db9 100644 --- a/lib/elasticsearch_template.py +++ b/lib/elasticsearch_template.py @@ -53,7 +53,7 @@ template = { "analyzer": { "my_sort_analyzer": { "type": "custom", - "tokenizer": "standard", + "tokenizer": "keyword", "filter": [ "original_preserving_ascii_folding" ] diff --git a/lib/postgis_helper.py b/lib/postgis_helper.py index 85fd00838829bcdb11570129149ca34381c98088..0465027fe6eb798f227f50fab89ed6183d7fed85 100644 --- a/lib/postgis_helper.py +++ b/lib/postgis_helper.py @@ -203,6 +203,7 @@ def main(**kwargs): schema_names = conn.get_schema_names() for schema in schema_names: if schema_name and not schema_name == schema: + logging.debug(f"not {schema_name}, sckipping {schema}") continue for table in conn.get_tables(schema=schema): if table_name and not table_name == table.name: diff --git a/tools/field_type_detector.py b/tools/field_type_detector.py index 78d92fe7560889ab97cafba7445b30b954b493f6..91a7863e32b6db77740c43aa14feb48be609ac83 100644 --- a/tools/field_type_detector.py +++ b/tools/field_type_detector.py @@ -151,12 +151,12 @@ def generate_field_catalog( cfg ): logging.info('Getting schemas...') schema_names = pg.get_schema_names() - logging.info('Done.') + logging.info('Done: %s', schema_names) for schema_name in schema_names: if schema_whitelist is not None: if schema_name not in schema_whitelist: - logging.debug('Skipping schema %s' % schema_name) + logging.debug('Skipping schema %s (not in whitelist M%s)', schema_name, schema_whitelist) continue for table in pg.get_tables(schema_name): if schema_dot_table_whitelist is not None: @@ -245,7 +245,9 @@ def main(cfg): elected_field_types = elect_field_type( field_catalog_by_field ) with open(filename2, 'w') as fp: + logging.debug("writing %s", filename2) json.dump(elected_field_types, fp, sort_keys=True) + logging.debug(json.dumps(elected_field_types) ) final_field_catalog_by_dbschematable = field_catalog_by_dbschematable.copy() @@ -254,7 +256,10 @@ def main(cfg): final_field_catalog_by_dbschematable[db_schema_table]['types'][field] = elected_field_types[field] with open(filename3, 'w') as fp: + logging.debug("writing %s", filename3) json.dump(final_field_catalog_by_dbschematable, fp, sort_keys=True) + from pprint import pformat + logging.debug(pformat(json.dumps(final_field_catalog_by_dbschematable) )) return diff --git a/workers/sample_generator.py b/workers/sample_generator.py index d86d5f3dc64d57cdb6d9cfb64a55d81c826c0e76..82dd4e90691d64e5a761e3050f8e90bde084ff4c 100644 --- a/workers/sample_generator.py +++ b/workers/sample_generator.py @@ -30,7 +30,10 @@ def callback(channel, method, properties, body): #data = res.json() # get sample records from the ingest index - source_es = Elasticsearch([cfg['reindexer']['source_url']], timeout=60) + if 'source_url' in cfg['reindexer'].keys(): + source_es = Elasticsearch([cfg['reindexer']['source_url']], timeout=60) + else: + source_es = Elasticsearch([cfg['reindexer']['destination_url']], timeout=60) the_query = dict() the_query['size'] = sample_size