Skip to content
Snippets Groups Projects
elasticsearch_template.py 10.4 KiB
Newer Older
  • Learn to ignore specific revisions
  • Alessandro Cerioni's avatar
    Alessandro Cerioni committed
    template = {
        #"index_patterns" : ["geodata.v1"],
        "order" : 0,
        "settings" : {
            "index.mapping.total_fields.limit": 10000,
            #"index.mapping.ignore_malformed": True,
            # "number_of_shards" : 48,
            # "number_of_replicas" : 0,
            "refresh_interval" : "30s",
            "max_ngram_diff": 100,
            "analysis": {
                "filter": {
                    "original_preserving_ascii_folding" : {
                        "type" : "asciifolding",
                        "preserve_original" : True
                    },
                    "french_elision": {
                        "type":         "elision",
                        "articles_case": True,
                        "articles": [
                            "l", "m", "t", "qu", "n", "s",
                            "j", "d", "c", "jusqu", "quoiqu",
                            "lorsqu", "puisqu"
                          ]
                    },
                    "french_stop": {
                      "type":       "stop",
                      "stopwords":  "_french_"
                    },
                    "synonym" : {
                            "type" : "synonym",
                            "synonyms" : ["velo => velo'v"]
                    }
                },
                "tokenizer": {
                    "my_edge_ngram_tokenizer": {
                        "type": "edge_ngram",
                        "min_gram": 2,
                        "max_gram": 30,
                        "token_chars": [
        				    "letter",
        					"digit",
                            "punctuation",
                            "symbol"
        				]
                    }
                },
                "analyzer": {
                    "edge_ngram_analyzer_with_asciifolding": {
                        "type": "custom",
                        "tokenizer": "my_edge_ngram_tokenizer",
                        "filter": [
                            "lowercase",
                            "original_preserving_ascii_folding",
                            "french_elision",
                            "french_stop",
                            "synonym"
                        ]
                    },
                    "my_search_analyzer": {
                        "type": "custom",
                        "tokenizer": "standard",
                        "filter": [
                            "lowercase",
                            "french_elision",
                            "french_stop"
                            #"my_other_multiplexer"
                        ]
                    },
                    "suggest_analyzer": {
                        "type": "custom",
                        "tokenizer": "standard",
                        "filter": [
                            "lowercase",
                            "french_elision",
                            "french_stop"#,
                            #"my_other_multiplexer"
                        ]
                    },
                }
            }
        },
        "mappings" : {
            "_doc" : {
                "dynamic_templates": [ # priority is given by order!
                    {
                        "geoshape-template" : {
                            "match_pattern": "regex",
                            "path_match": "metadata-fr.bbox|data-fr.geometry",
                            "mapping": {
                                "type": "geo_shape",
                                "tree": "quadtree",
                                #"index": "false"
                                "ignore_malformed": True
                            }
                        }
                    },
                    {
                        "keyword-template" : {
                            "match_pattern": "regex",
                            "path_match": ".*md5.*|metadata-fr\.link\.formats.*|metadata-fr\.link\.service.*|metadata-fr\.parentId.*|metadata-fr\.geonet\:info\.uuid|slug|uuid",
                            "mapping": {
                                "type": "text",
                                "index": False,
                                "fields": {
                                    "keyword": {
                                        "type": "keyword"
                                    }
                                }
                                #"ignore_malformed": True
                            }
                        }
                    },
                    {
                        "date-template" : {
                            "match_mapping_type": "date",
                            # "path_match": "data-fr\.properties\.date.+|data-fr\.properties\.last_update.*|metadata-fr\.creationDate",
                            "mapping": {
                                "type": "date",
                                "format": "strict_date_optional_time",#||epoch_millis||yyyy-MM-dd HH:mm:ss",
                                "fields": {
                                    "sort": {
                                        "type": "date"
                                    }
                                }
                            }
                        }
                    },
                    {
                        "nested-template": {
                          "path_match": "metadata-fr.responsibleParty",
                          "mapping": {
                            "type":     "nested"
                            # "fields": {
                            #     "sort":
                            #      {
                            #          "type": "boolean"
                            #      }
                            # }
                          }
                        }
                    },
                    {
                        "unindexed-path-template-1": {
                            "match_pattern": "regex",
                            "match_mapping_type": "*",
                            "path_match": "metadata-fr\.href.*|metadata-fr\.idxMsg.*|data-fr\.geometry\..*|metadata-fr\.identifier.*|metadata-fr\.geonet\:info\..+|metadata-fr\.responsibleParty\.logo|metadata-fr\.image\..*|.*url|metadata-fr\.link\.name",
                            # "match": "(metadata-fr\.image.*|data-fr\.geometry.*|metadata-fr\.href.*|metadata-fr\.idxMsg.*)", metadata-fr\.geonet\:info.*
                            "mapping": {
                                # "type": "text",
                                # "ignore_malformed": True,
                                "enabled": False
                            }
                        }
                    },
                    {
                        "unindexed-path-template-2": {
                            "match_pattern": "regex",
                            "match_mapping_type": "*",
                            "path_match": "metadata-fr\.link\.projections|metadata-fr\.link\.content-type|metadata-fr\.link\.content-length|metadata-fr\.link\.bbox_by_projection|fields",
                            # "match": "(metadata-fr\.image.*|data-fr\.geometry.*|metadata-fr\.href.*|metadata-fr\.idxMsg.*)",
                            "mapping": {
                                #"type": "nested",
                                # "ignore_malformed": True,
                                "enabled": False
                            }
                        }
                    },
                    {
                        "long-template": {
                          "match_mapping_type": "long",
                          "mapping": {
                            "type":     "long",
                            "fields": {
                                "sort":
                                 {
                                     "type": "long"
                                 }
                            }
                          }
                        }
                    },
                    {
                        "double-template": {
                          "match_mapping_type": "double",
                          "mapping": {
                            "type":     "double",
                            "fields": {
                                "sort":
                                 {
                                     "type": "double"
                                 }
                            }
                          }
                        }
                    },
                    {
                        "boolean-template": {
                          "match_mapping_type": "boolean",
                          "mapping": {
                            "type":     "boolean",
                            "fields": {
                                "sort":
                                 {
                                     "type": "boolean"
                                 }
                            }
                          }
                        }
                    },
                    {
                        "data-template": {
                          "match_pattern": "regex",
                          "path_match": "data-fr\.properties\..+",
                          "match_mapping_type": "string",
                          "mapping": {
                            "type":     "text",
                            # "ignore_malformed": True,
                            "analyzer": "edge_ngram_analyzer_with_asciifolding",
                            # "search_analyzer": "standard",
                            "search_analyzer": "my_search_analyzer",
                            "term_vector": "with_positions_offsets",
                            "copy_to": ["data", "data_and_metadata"],
                            "index_options": "offsets",
                            "fields": {
                              "keyword": {
                                "type": "keyword",
                                "ignore_above": 256,
                              },
                              "sort": {
                                "type": "keyword"
                              },
                              "suggest": {
                                "type": "text",
                                "analyzer": "suggest_analyzer"
                              }
                            }
                          }
                        }
                    },
                    {
                        "string-template": {
                          "match_mapping_type": "string",
                          "mapping": {
                            "type":     "text",
                            "analyzer": "edge_ngram_analyzer_with_asciifolding",
                            "search_analyzer": "my_search_analyzer",
                            "term_vector": "with_positions_offsets",
                            "copy_to": "data_and_metadata",
                            "index_options": "offsets",
                            "fields": {
                              "keyword": {
                                "type": "keyword",
                                "ignore_above": 256,
                              },
                              "sort": {
                                "type": "keyword"
                              },
                              "suggest": {
                                "type": "text",
                                "analyzer": "suggest_analyzer"
                              }
                            }
                          }
                        }
                    }
                ]
            }
        }
    }