Skip to content
Snippets Groups Projects
elastic_mapping_template_pages.py 14 KiB
Newer Older
  • Learn to ignore specific revisions
  • Alessandro Cerioni's avatar
    Alessandro Cerioni committed
    # -*- coding: UTF-8 -*-
    
    template = {
        #"index_patterns" : ["posts.v1"],
        "order" : 0,
        "settings" : {
            "index.mapping.total_fields.limit": 10000,
            #"index.mapping.ignore_malformed": True,
            "number_of_shards" : 1,
            "number_of_replicas" : 0,
            "max_ngram_diff": 100,
            "analysis": {
                "filter": {
                    "my_ascii_folding" : {
                        "type" : "asciifolding",
                        "preserve_original" : False
                    },
                    "my_original_preserving_ascii_folding" : {
                        "type" : "asciifolding",
                        "preserve_original" : True
                    },
                    "french_elision": {
                        "type":         "elision",
                        "articles_case": True,
                        "articles": [
                            "l", "m", "t", "qu", "n", "s",
                            "j", "d", "c", "jusqu", "quoiqu",
                            "lorsqu", "puisqu"
                          ]
                    },
                    "french_stop": {
                      "type":       "stop",
                      "stopwords":  "_french_"
                    },
                    "preserving_word_delimiter": {
        				"type": "word_delimiter",
        				"preserve_original": "true"
    			    },
                    "protect_keywords": {
                      "type": "keyword_marker",
                      "keywords": ["vélo'v"]
                    }
                    # "shingle": {
                    #             "type": "shingle",
                    #             "min_shingle_size": 2,
                    #             "max_shingle_size": 3
                    # }
                    # "french_keywords": {
                    #   "type":       "keyword_marker",
                    #   "keywords":   ["Exemple"]
                    # },
                    # "french_stemmer": {
                    #   "type":       "stemmer",
                    #   "language":   "light_french"
                    # }
                },
                "tokenizer": {
                    "my_edge_ngram_tokenizer": {
                        "type": "edge_ngram",
                        "min_gram": 2,
                        "max_gram": 30,
                        "token_chars": [
        					"letter",
        					"digit"
        				]
                    }
                },
                "analyzer": {
                    "edge_ngram_analyzer_with_asciifolding": {
                        "type": "custom",
                        "tokenizer": "my_edge_ngram_tokenizer",
                        "filter": [
                            "lowercase",
                            "protect_keywords",
                            "my_original_preserving_ascii_folding",
                            "french_elision",
                            "french_stop"#,
                            #"preserving_word_delimiter"
                        ]
                    },
                    "my_search_analyzer": {
                        "type": "custom",
                        "tokenizer": "standard",
                        "filter": [
                            "lowercase",
                            "protect_keywords",
                            # "my_ascii_folding",
                            "french_elision",
                            "french_stop"#,
                            #"preserving_word_delimiter"
                        ]
                    },
                    "my_suggest_analyzer": {
                        "type": "custom",
                        "tokenizer": "standard",
                        "filter": [
                            "lowercase",
                            "protect_keywords",
                            # "my_ascii_folding",
                            "french_elision",
                            "french_stop"#,
                            #"preserving_word_delimiter"
                        ]
                    },
                    # "trigram": {
                    #             "type": "custom",
                    #             "tokenizer": "standard",
                    #             "filter": ["standard", "shingle"]
                    # },
                    # "reverse": {
                    #             "type": "custom",
                    #             "tokenizer": "standard",
                    #             "filter": ["standard", "reverse"]
                    # }
                }
            }
        },
        "mappings" : {
    
    DESPRES Damien's avatar
    DESPRES Damien committed
            "_doc" : {
    
    Alessandro Cerioni's avatar
    Alessandro Cerioni committed
                "dynamic_templates": [ # priority is given by order!
                    {
                        "geoshape-template" : {
                            "match_pattern": "regex",
                            "path_match": "metadata-fr.bbox|data-fr.geometry",
                            "mapping": {
                                "type": "geo_shape",
                                "tree": "quadtree",
                                #"index": "false"
                                "ignore_malformed": True
                            }
                        }
                    },
                    {
                        "link-template" : {
                            "path_match": "metadata-fr.link",
                            "mapping": {
                                #"type": "nested",
                                "index": "false"
                                #"ignore_malformed": True
                            }
                        }
                    },
                    # {
                    #     "exception1-template" : {
                    #         "path_match": "data-fr.properties.date_photo",
                    #         "mapping": {
                    #             "type": "text"
                    #             # "index": False,
                    #             #"ignore_malformed": True
                    #         }
                    #     }
                    # },
                    {
                        "date-template" : {
                            "match_mapping_type": "date",
                            # "path_match": "data-fr\.properties\.date.+|data-fr\.properties\.last_update.*|metadata-fr\.creationDate",
                            "mapping": {
                                "type": "date",
                                "format": "strict_date_optional_time",#||epoch_millis||yyyy-MM-dd HH:mm:ss",
                                "fields": {
                                    "sort": {
                                        "type": "date"
                                    }
                                }
                                # "index": False,
                                #"ignore_malformed": True
                            }
                        }
                    },
                    # {
                    #     "unindexed-field-template": {
                    #         "match_pattern": "regex",
                    #         "match": "url|href",
                    #         "mapping": {
                    #             # "type": "keyword",
                    #             "index": False,
                    #             "ignore_malformed": True
                    #         }
                    #     }
                    # },
                    {
                        "unindexed-path-template": {
                            "match_pattern": "regex",
                            "match_mapping_type": "*",
                            "path_match": "metadata-fr\.href.*|metadata-fr\.idxMsg.*|data-fr\.geometry\..*|metadata-fr\.identifier.*|metadata-fr\.geonet\:info\.@xmlns:geonet|metadata-fr\.responsibleParty\.logo|metadata-fr\.image\..*|.*url|metadata-fr\.link\.name",
                            # "match": "(metadata-fr\.image.*|data-fr\.geometry.*|metadata-fr\.href.*|metadata-fr\.idxMsg.*)",
                            "mapping": {
                                "type": "text",
                                #"ignore_malformed": True
                                "index": False
                            }
                        }
                    },
                    {
                        "long-template": {
                          "match_mapping_type": "long",
                          "mapping": {
                            "type":     "long",
                            "fields": {
                                "sort":
                                 {
                                     "type": "long"
                                 }
                            }
                          }
                        }
                    },
                    {
                        "double-template": {
                          "match_mapping_type": "double",
                          "mapping": {
                            "type":     "double",
                            "fields": {
                                "sort":
                                 {
                                     "type": "double"
                                 }
                            }
                          }
                        }
                    },
                    {
                        "boolean-template": {
                          "match_mapping_type": "boolean",
                          "mapping": {
                            "type":     "boolean",
                            "fields": {
                                "sort":
                                 {
                                     "type": "boolean"
                                 }
                            }
                          }
                        }
                    },
                    # {
                    #     "exception1-template": {
                    #         "match_pattern": "regex",
                    #         "match": "data-fr.properties.datemajgraph|data-fr.properties.date_creation", #test-q-plus-wfs.c1b069ca-181d-4265-9838-8d182f207bd3.ingest.v6
                    #         "mapping": {
                    #             "type": "date",
                    #             "ignore_malformed": True
                    #         }
                    #     }
                    # },
                    # {
                    #     "exception2-template": {
                    #         "match_mapping_type": "long",
                    #         # "match": "numero", #test-q-plus-wfs.c1b069ca-181d-4265-9838-8d182f207bd3.ingest.v6
                    #         "mapping": {
                    #             "type": "long",v9
                    #             "ignore_malformed": True
                    #         }
                    #     }
                    # },
                    # {
                    #     "exception3-template": {
                    #         "match_pattern": "regex",
                    #         "match": "data-fr\.properties\.address", #test-q-plus-wfs.c1b069ca-181d-4265-9838-8d182f207bd3.ingest.v6
                    #         "mapping": {
                    #           "type": "object",
                    #           "ignore_malformed": True
                    #         }
                    #     }
                    # },
                    # {
                    #     "exception4-template": {
                    #         "match_mapping_type": "object",
                    #         # "match": "numero", #test-q-plus-wfs.c1b069ca-181d-4265-9838-8d182f207bd3.ingest.v6
                    #         "mapping": {
                    #             "type": "object",
                    #             "ignore_malformed": True
                    #         }
                    #     }
                    # },search_ana
                    # {
                    #     "exception5-template": {
                    #         "match_pattern": "regex",
                    #         "match": "openinghoursspecification",
                    #         # "match": "numero", #test-q-plus-wfs.c1b069ca-181d-4265-9838-8d182f207bd3.ingest.v6
                    #         "mapping": {
                    #             "type": "nested",
                    #             "ignore_malformed": True
                    #         }
                    #     }
                    # },
                    {
                        "data-template": {
                          "match_pattern": "regex",
                          "path_match": "data-fr\.properties\..+",
                          "match_mapping_type": "string",
                          "mapping": {
                            "type":     "text",
                            # "ignore_malformed": True,
                            "analyzer": "edge_ngram_analyzer_with_asciifolding",
                            # "search_analyzer": "standard",
                            "search_analyzer": "my_search_analyzer",
                            "term_vector": "with_positions_offsets",
                            "copy_to": ["data", "data_and_metadata"],
                            "index_options": "offsets",
                            "fields": {
                              "keyword": {
                                "type": "keyword",
                                "ignore_above": 256,
                              },
                              "sort": {
                                "type": "keyword"
                              },
                              "suggest": {
                                "type": "completion",
                                "analyzer": "simple"
                                #"preserve_position_increments":
                              }
                            }
                          }
                        }
                    },
                    {
                        "string-template": {
                          "match_mapping_type": "string",
                          "mapping": {
                            "type":     "text",
                            # "ignore_malformed": True,
                            "analyzer": "edge_ngram_analyzer_with_asciifolding",
                            # "search_analyzer": "standard",
                            "search_analyzer": "my_search_analyzer",
                            "term_vector": "with_positions_offsets",
                            "copy_to": "data_and_metadata",
                            "index_options": "offsets",
                            "fields": {
                              "keyword": {
                                "type": "keyword",
                                "ignore_above": 256,
                              },
                              "sort": {
                                "type": "keyword"
                              },
                              "suggest": {
                                "type": "text",
                                "analyzer": "my_suggest_analyzer"
                                #"preserve_position_increments":
                              }
                            #   "trigram": {
                            #     "type": "text",
                            #     "analyzer": "trigram"
                            #   },
                            #   "reverse": {
                            #       "type": "text",
                            #       "analyzer": "reverse"
                            #   }
                            }
                          }
                        }
                    }
                ]
    
    DESPRES Damien's avatar
    DESPRES Damien committed
            }