Skip to content
Snippets Groups Projects
es_template.py 11.7 KiB
Newer Older
  • Learn to ignore specific revisions
  • Alessandro Cerioni's avatar
    Alessandro Cerioni committed
    template = {
        #"index_patterns" : ["geodata.v1"],
        "order" : 0,
        "settings" : {
            "index.mapping.total_fields.limit": 10000,
            #"index.mapping.ignore_malformed": True,
    
    Alessandro Cerioni's avatar
    Alessandro Cerioni committed
            # "number_of_shards" : 48,
            # "number_of_replicas" : 0,
    
    Alessandro Cerioni's avatar
    Alessandro Cerioni committed
            "max_ngram_diff": 100,
            "analysis": {
                "filter": {
                    "my_ascii_folding" : {
                        "type" : "asciifolding",
                        "preserve_original" : False
                    },
                    "my_original_preserving_ascii_folding" : {
                        "type" : "asciifolding",
                        "preserve_original" : True
                    },
                    "french_elision": {
                        "type":         "elision",
                        "articles_case": True,
                        "articles": [
                            "l", "m", "t", "qu", "n", "s",
                            "j", "d", "c", "jusqu", "quoiqu",
                            "lorsqu", "puisqu"
                          ]
                    },
                    "french_stop": {
                      "type":       "stop",
                      "stopwords":  "_french_"
                    },
                    "preserving_word_delimiter": {
        				"type": "word_delimiter",
        				"preserve_original": "true"
    			    },
                    "keep_keywords": {
                      "type": "keep",
                      "keep_words": ["vélo'v", "Vélo'V", "Vélo'v", "metropole de lyon"]
                    },
                    "my_multiplexer" : {
                        "type" : "multiplexer",
                        "filters" : [ "lowercase, my_original_preserving_ascii_folding, french_elision, french_stop", "keep_keywords" ],
                        #"filters" : [ "keep_keywords" ]
                        "preserve_original": "false"
                    },
                    "my_other_multiplexer" : {
                        "type" : "multiplexer",
                        "filters" : [ "lowercase, french_elision, french_stop", "keep_keywords" ],
                        #"filters" : [ "keep_keywords" ]
                        "preserve_original": "false"
                    }
                },
                "tokenizer": {
                    "my_edge_ngram_tokenizer": {
                        "type": "edge_ngram",
                        "min_gram": 2,
                        "max_gram": 30,
                        "token_chars": [
        					"letter",
        					"digit"
        				]
                    }
                },
                "analyzer": {
                    "edge_ngram_analyzer_with_asciifolding": {
                        "type": "custom",
                        "tokenizer": "my_edge_ngram_tokenizer",
                        "filter": [
                            "lowercase",
                            "my_original_preserving_ascii_folding",
                            "french_elision",
                            "french_stop"
                            #"my_multiplexer"
                        ]
                    },
                    "my_search_analyzer": {
                        "type": "custom",
                        "tokenizer": "standard",
                        "filter": [
                            "lowercase",
                            "french_elision",
                            "french_stop"
                            #"my_other_multiplexer"
                        ]
                    },
                    "my_suggest_analyzer": {
                        "type": "custom",
                        "tokenizer": "standard",
                        "filter": [
                            "lowercase",
                            "french_elision",
                            "french_stop"#,
                            #"my_other_multiplexer"
                        ]
                    },
                }
            }
        },
        "mappings" : {
            "_doc" : {
                "dynamic_templates": [ # priority is given by order!
                    {
                        "geoshape-template" : {
                            "match_pattern": "regex",
                            "path_match": "metadata-fr.bbox|data-fr.geometry",
                            "mapping": {
                                "type": "geo_shape",
                                "tree": "quadtree",
                                #"index": "false"
                                "ignore_malformed": True
                            }
                        }
                    },
    
                    # {
                    #     "link-template" : {
                    #         "path_match": "metadata-fr.link",
                    #         "mapping": {
                    #             #"type": "nested",
                    #             "index": "false"
                    #             #"ignore_malformed": True
                    #         }
                    #     }
                    # },
    
    Alessandro Cerioni's avatar
    Alessandro Cerioni committed
                    {
    
                        "keyword-template" : {
                            "match_pattern": "regex",
                            "path_match": ".*md5.*|metadata-fr\.link\.formats.*|metadata-fr\.link\.service.*|metadata-fr\.parentId.*",
    
    Alessandro Cerioni's avatar
    Alessandro Cerioni committed
                            "mapping": {
    
                                "type": "text",
                                "index": False,
                                "fields": {
                                    "keyword": {
                                        "type": "keyword"
                                    }
                                }
    
    Alessandro Cerioni's avatar
    Alessandro Cerioni committed
                                #"ignore_malformed": True
                            }
                        }
                    },
                    {
                        "date-template" : {
                            "match_mapping_type": "date",
                            # "path_match": "data-fr\.properties\.date.+|data-fr\.properties\.last_update.*|metadata-fr\.creationDate",
                            "mapping": {
                                "type": "date",
                                "format": "strict_date_optional_time",#||epoch_millis||yyyy-MM-dd HH:mm:ss",
                                "fields": {
                                    "sort": {
                                        "type": "date"
                                    }
                                }
                            }
                        }
                    },
    
    Alessandro Cerioni's avatar
    Alessandro Cerioni committed
                    {
                        "nested-template": {
                          "path_match": "metadata-fr.responsibleParty",
                          "mapping": {
                            "type":     "nested"
                            # "fields": {
                            #     "sort":
                            #      {
                            #          "type": "boolean"
                            #      }
                            # }
                          }
                        }
                    },
    
    Alessandro Cerioni's avatar
    Alessandro Cerioni committed
                    {
    
                        "unindexed-path-template-1": {
                            "match_pattern": "regex",
                            "match_mapping_type": "*",
                            "path_match": "metadata-fr\.href.*|metadata-fr\.idxMsg.*|data-fr\.geometry\..*|metadata-fr\.identifier.*|metadata-fr\.geonet\:info.*:geonet|metadata-fr\.responsibleParty\.logo|metadata-fr\.image\..*|.*url|metadata-fr\.link\.name",
                            # "match": "(metadata-fr\.image.*|data-fr\.geometry.*|metadata-fr\.href.*|metadata-fr\.idxMsg.*)",
                            "mapping": {
                                "type": "text",
                                #"ignore_malformed": True
                                "index": False
                            }
                        }
                    },
                    {
                        "unindexed-path-template-2": {
    
    Alessandro Cerioni's avatar
    Alessandro Cerioni committed
                            "match_pattern": "regex",
                            "match_mapping_type": "*",
    
                            "path_match": "metadata-fr\.link\.bbox_by_projection|metadata-fr\.link\.projections|metadata-fr\.link\.content-type|metadata-fr\.link\.content-length",
    
    Alessandro Cerioni's avatar
    Alessandro Cerioni committed
                            # "match": "(metadata-fr\.image.*|data-fr\.geometry.*|metadata-fr\.href.*|metadata-fr\.idxMsg.*)",
                            "mapping": {
                                "type": "text",
                                #"ignore_malformed": True
                                "index": False
                            }
                        }
                    },
                    {
                        "long-template": {
                          "match_mapping_type": "long",
                          "mapping": {
                            "type":     "long",
                            "fields": {
                                "sort":
                                 {
                                     "type": "long"
                                 }
                            }
                          }
                        }
                    },
                    {
                        "double-template": {
                          "match_mapping_type": "double",
                          "mapping": {
                            "type":     "double",
                            "fields": {
                                "sort":
                                 {
                                     "type": "double"
                                 }
                            }
                          }
                        }
                    },
                    {
                        "boolean-template": {
                          "match_mapping_type": "boolean",
                          "mapping": {
                            "type":     "boolean",
                            "fields": {
                                "sort":
                                 {
                                     "type": "boolean"
                                 }
                            }
                          }
                        }
                    },
                    {
                        "data-template": {
                          "match_pattern": "regex",
                          "path_match": "data-fr\.properties\..+",
                          "match_mapping_type": "string",
                          "mapping": {
                            "type":     "text",
                            # "ignore_malformed": True,
                            "analyzer": "edge_ngram_analyzer_with_asciifolding",
                            # "search_analyzer": "standard",
                            "search_analyzer": "my_search_analyzer",
                            "term_vector": "with_positions_offsets",
                            "copy_to": ["data", "data_and_metadata"],
                            "index_options": "offsets",
                            "fields": {
                              "keyword": {
                                "type": "keyword",
                                "ignore_above": 256,
                              },
                              "sort": {
                                "type": "keyword"
                              },
                              "suggest": {
                                "type": "completion",
                                "analyzer": "simple"
                                #"preserve_position_increments":
                              }
                            }
                          }
                        }
                    },
                    {
                        "string-template": {
                          "match_mapping_type": "string",
                          "mapping": {
                            "type":     "text",
                            # "ignore_malformed": True,
                            "analyzer": "edge_ngram_analyzer_with_asciifolding",
                            # "search_analyzer": "standard",
                            "search_analyzer": "my_search_analyzer",
                            "term_vector": "with_positions_offsets",
                            "copy_to": "data_and_metadata",
                            "index_options": "offsets",
                            "fields": {
                              "keyword": {
                                "type": "keyword",
                                "ignore_above": 256,
                              },
                              "sort": {
                                "type": "keyword"
                              },
                              "suggest": {
                                "type": "text",
                                "analyzer": "my_suggest_analyzer"
                              }
                            }
                          }
                        }
                    }
                ]
            }
        }
    }