import json import sys, os from elasticsearch import Elasticsearch from assets.elasticsearch_template import template def query(txt): _query = { "query": {"query_string": { "query": f"{txt}", "default_field": 'data_and_metadata', "analyzer": 'simple', "fuzziness": 'AUTO', "minimum_should_match": '90%', }, }, "highlight": { "fields": { "*": { "number_of_fragments": 10, "fragment_size": 50, "fragmenter": "span", "type": "unified", "require_field_match": False, "order": "score" } }, "highlight_query": { "query_string": { "query": f"{txt}", "default_field": 'data_and_metadata', "analyzer": 'my_search_analyzer', "fuzziness": 'AUTO', "minimum_should_match": '90%', }, }, }, "suggest": { "text": f"{txt}", "suggestion": { "phrase": { "field": "data_and_metadata.suggest", "size": 10, "analyzer": "my_search_analyzer", "direct_generator": [ { "field": "data_and_metadata.suggest", "suggest_mode": "missing", "prefix_length": 1, "min_word_length": 1 } ], #"real_word_error_likelihood": 0.50, #"shard_size": 1000, "highlight": { "pre_tag": "<b><i>", "post_tag": "</i></b>" } } } } } return _query def print_options( es_response ): suggestions = es_response['suggest']['suggestion'] if len(suggestions) == 0: print('No suggestions :-(') return for suggestion in suggestions: for index, option in enumerate(suggestion['options']): print(index+1, option) return if __name__ == '__main__': es = Elasticsearch([os.environ['ES_ROOT_URL']], timeout=60, use_ssl=bool(int(os.environ['ES_USE_SSL'])), verify_certs=False) template['index_patterns'] = [ 'test-suggester*' ] template['settings']['number_of_shards'] = 1 template['settings']['number_of_replicas'] = 1 indices = ['test-suggester.1', 'test-suggester.2', 'test-suggester.3'] try: rep = es.indices.delete_template('test-suggester') except Exception as e: print(e) try: rep = es.indices.put_template('test-suggester', template) except Exception as e: print(e) for index in indices: try: rep = es.indices.delete(index) except Exception as e: print(e) try: rep = es.indices.create(index) except Exception as e: print(e) filenames = sorted(os.listdir('data')) for idx, filename in enumerate(filenames): with open(os.path.join('data', filename), 'r') as fp: doc = json.load(fp) rep = es.index(index='test-suggester.3', body=doc, refresh='true', doc_type='_doc') if idx < len(filenames)/2: rep = es.index(index='test-suggester.1', body=doc, refresh='true', doc_type='_doc') #print('here', idx) else: rep = es.index(index='test-suggester.2', body=doc, refresh='true', doc_type='_doc') while True: txt = input("Please enter your query: ") print() print("Query #1: against one index") rep = es.search(index="test-suggester.3", body=query(txt)) #print( json.dumps(rep, indent=4) ) print_options(rep) print() print("Query #2: against two indices") rep = es.search(index="test-suggester.1,test-suggester.2", body=query(txt)) #print( json.dumps(rep, indent=4) ) print_options(rep) print()