Skip to content
Snippets Groups Projects
test.py 3.58 KiB
Newer Older
  • Learn to ignore specific revisions
  • Alessandro Cerioni's avatar
    Alessandro Cerioni committed
    import json
    import sys, os
    from elasticsearch import Elasticsearch
    from assets.elasticsearch_template import template
    
    def query(txt):
    
      _query = {
        "query": {"query_string": {
            "query": f"{txt}",
            "default_field": 'data_and_metadata',
            "analyzer": 'simple',
            "fuzziness": 'AUTO',
            "minimum_should_match": '90%',
            },
        },
        "highlight": {
          "fields": {
            "*": {
              "number_of_fragments": 10,
              "fragment_size": 50,
              "fragmenter": "span",
              "type": "unified",
              "require_field_match": False,
              "order": "score"
            }
          },
          "highlight_query": {
                  "query_string": {
                    "query": f"{txt}",
                    "default_field": 'data_and_metadata',
                    "analyzer": 'my_search_analyzer',
                    "fuzziness": 'AUTO',
                    "minimum_should_match": '90%',
                  },
                },
        },
        "suggest": {
          "text": f"{txt}",
          "suggestion": {
            "phrase": {
              "field": "data_and_metadata.suggest",
              "size": 10,
              "analyzer": "my_search_analyzer",
              "direct_generator": [
                {
                  "field": "data_and_metadata.suggest",
                  "suggest_mode": "missing",
                  "prefix_length": 1,
                  "min_word_length": 1
                }
              ],
              #"real_word_error_likelihood": 0.50,
              #"shard_size": 1000,
              "highlight": {
                "pre_tag": "<b><i>",
                "post_tag": "</i></b>"
              }
            }
          }
        }
      }
    
      return _query
    
    
    def print_options( es_response ):
    
      suggestions = es_response['suggest']['suggestion']
    
      if len(suggestions) == 0:
        print('No suggestions :-(')
        return
    
      for suggestion in suggestions:
        for index, option in enumerate(suggestion['options']):
          print(index+1, option)
    
      return
    
    if __name__ == '__main__':
    
      es = Elasticsearch([os.environ['ES_ROOT_URL']], timeout=60, use_ssl=bool(int(os.environ['ES_USE_SSL'])), verify_certs=False)
    
      template['index_patterns'] = [ 'test-suggester*' ]
      template['settings']['number_of_shards'] = 1
      template['settings']['number_of_replicas'] = 1
    
      indices = ['test-suggester.1', 'test-suggester.2', 'test-suggester.3']
    
      try:
        rep = es.indices.delete_template('test-suggester')
      except Exception as e:
        print(e)
    
      try:
        rep = es.indices.put_template('test-suggester', template)
      except Exception as e:
        print(e)
    
    
      for index in indices:
    
        try:
          rep = es.indices.delete(index)
        except Exception as e:
          print(e)
    
        try:
          rep = es.indices.create(index)
        except Exception as e:
          print(e)
    
      filenames = sorted(os.listdir('data'))
    
      for idx, filename in enumerate(filenames):
    
        with open(os.path.join('data', filename), 'r') as fp:
          doc = json.load(fp)
    
        rep = es.index(index='test-suggester.3', body=doc, refresh='true', doc_type='_doc')
    
        if idx < len(filenames)/2:
          rep = es.index(index='test-suggester.1', body=doc, refresh='true', doc_type='_doc')
          #print('here', idx)
        else:
          rep = es.index(index='test-suggester.2', body=doc, refresh='true', doc_type='_doc')
    
    
    
      while True:
        txt = input("Please enter your query: ")
    
        print()
        print("Query #1: against one index")
    
        rep = es.search(index="test-suggester.3", body=query(txt))
        #print( json.dumps(rep, indent=4) )
        print_options(rep)
    
        print()
        print("Query #2: against two indices")
        rep = es.search(index="test-suggester.1,test-suggester.2", body=query(txt))
        #print( json.dumps(rep, indent=4) )
        print_options(rep)
        print()