elastic_mapping_template_pages.py 14 KB
Newer Older
Alessandro CERIONI's avatar
Alessandro CERIONI committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# -*- coding: UTF-8 -*-

template = {
    #"index_patterns" : ["posts.v1"],
    "order" : 0,
    "settings" : {
        "index.mapping.total_fields.limit": 10000,
        #"index.mapping.ignore_malformed": True,
        "number_of_shards" : 1,
        "number_of_replicas" : 0,
        "max_ngram_diff": 100,
        "analysis": {
            "filter": {
                "my_ascii_folding" : {
                    "type" : "asciifolding",
                    "preserve_original" : False
                },
                "my_original_preserving_ascii_folding" : {
                    "type" : "asciifolding",
                    "preserve_original" : True
                },
                "french_elision": {
                    "type":         "elision",
                    "articles_case": True,
                    "articles": [
                        "l", "m", "t", "qu", "n", "s",
                        "j", "d", "c", "jusqu", "quoiqu",
                        "lorsqu", "puisqu"
                      ]
                },
                "french_stop": {
                  "type":       "stop",
                  "stopwords":  "_french_"
                },
                "preserving_word_delimiter": {
    				"type": "word_delimiter",
    				"preserve_original": "true"
			    },
                "protect_keywords": {
                  "type": "keyword_marker",
                  "keywords": ["vélo'v"]
                }
                # "shingle": {
                #             "type": "shingle",
                #             "min_shingle_size": 2,
                #             "max_shingle_size": 3
                # }
                # "french_keywords": {
                #   "type":       "keyword_marker",
                #   "keywords":   ["Exemple"]
                # },
                # "french_stemmer": {
                #   "type":       "stemmer",
                #   "language":   "light_french"
                # }
            },
            "tokenizer": {
                "my_edge_ngram_tokenizer": {
                    "type": "edge_ngram",
                    "min_gram": 2,
                    "max_gram": 30,
                    "token_chars": [
    					"letter",
    					"digit"
    				]
                }
            },
            "analyzer": {
                "edge_ngram_analyzer_with_asciifolding": {
                    "type": "custom",
                    "tokenizer": "my_edge_ngram_tokenizer",
                    "filter": [
                        "lowercase",
                        "protect_keywords",
                        "my_original_preserving_ascii_folding",
                        "french_elision",
                        "french_stop"#,
                        #"preserving_word_delimiter"
                    ]
                },
                "my_search_analyzer": {
                    "type": "custom",
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase",
                        "protect_keywords",
                        # "my_ascii_folding",
                        "french_elision",
                        "french_stop"#,
                        #"preserving_word_delimiter"
                    ]
                },
                "my_suggest_analyzer": {
                    "type": "custom",
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase",
                        "protect_keywords",
                        # "my_ascii_folding",
                        "french_elision",
                        "french_stop"#,
                        #"preserving_word_delimiter"
                    ]
                },
                # "trigram": {
                #             "type": "custom",
                #             "tokenizer": "standard",
                #             "filter": ["standard", "shingle"]
                # },
                # "reverse": {
                #             "type": "custom",
                #             "tokenizer": "standard",
                #             "filter": ["standard", "reverse"]
                # }
            }
        }
    },
    "mappings" : {
DESPRES Damien's avatar
DESPRES Damien committed
119
        "_doc" : {
Alessandro CERIONI's avatar
Alessandro CERIONI committed
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
            "dynamic_templates": [ # priority is given by order!
                {
                    "geoshape-template" : {
                        "match_pattern": "regex",
                        "path_match": "metadata-fr.bbox|data-fr.geometry",
                        "mapping": {
                            "type": "geo_shape",
                            "tree": "quadtree",
                            #"index": "false"
                            "ignore_malformed": True
                        }
                    }
                },
                {
                    "link-template" : {
                        "path_match": "metadata-fr.link",
                        "mapping": {
                            #"type": "nested",
                            "index": "false"
                            #"ignore_malformed": True
                        }
                    }
                },
                # {
                #     "exception1-template" : {
                #         "path_match": "data-fr.properties.date_photo",
                #         "mapping": {
                #             "type": "text"
                #             # "index": False,
                #             #"ignore_malformed": True
                #         }
                #     }
                # },
                {
                    "date-template" : {
                        "match_mapping_type": "date",
                        # "path_match": "data-fr\.properties\.date.+|data-fr\.properties\.last_update.*|metadata-fr\.creationDate",
                        "mapping": {
                            "type": "date",
                            "format": "strict_date_optional_time",#||epoch_millis||yyyy-MM-dd HH:mm:ss",
                            "fields": {
                                "sort": {
                                    "type": "date"
                                }
                            }
                            # "index": False,
                            #"ignore_malformed": True
                        }
                    }
                },
                # {
                #     "unindexed-field-template": {
                #         "match_pattern": "regex",
                #         "match": "url|href",
                #         "mapping": {
                #             # "type": "keyword",
                #             "index": False,
                #             "ignore_malformed": True
                #         }
                #     }
                # },
                {
                    "unindexed-path-template": {
                        "match_pattern": "regex",
                        "match_mapping_type": "*",
                        "path_match": "metadata-fr\.href.*|metadata-fr\.idxMsg.*|data-fr\.geometry\..*|metadata-fr\.identifier.*|metadata-fr\.geonet\:info\.@xmlns:geonet|metadata-fr\.responsibleParty\.logo|metadata-fr\.image\..*|.*url|metadata-fr\.link\.name",
                        # "match": "(metadata-fr\.image.*|data-fr\.geometry.*|metadata-fr\.href.*|metadata-fr\.idxMsg.*)",
                        "mapping": {
                            "type": "text",
                            #"ignore_malformed": True
                            "index": False
                        }
                    }
                },
                {
                    "long-template": {
                      "match_mapping_type": "long",
                      "mapping": {
                        "type":     "long",
                        "fields": {
                            "sort":
                             {
                                 "type": "long"
                             }
                        }
                      }
                    }
                },
                {
                    "double-template": {
                      "match_mapping_type": "double",
                      "mapping": {
                        "type":     "double",
                        "fields": {
                            "sort":
                             {
                                 "type": "double"
                             }
                        }
                      }
                    }
                },
                {
                    "boolean-template": {
                      "match_mapping_type": "boolean",
                      "mapping": {
                        "type":     "boolean",
                        "fields": {
                            "sort":
                             {
                                 "type": "boolean"
                             }
                        }
                      }
                    }
                },
                # {
                #     "exception1-template": {
                #         "match_pattern": "regex",
                #         "match": "data-fr.properties.datemajgraph|data-fr.properties.date_creation", #test-q-plus-wfs.c1b069ca-181d-4265-9838-8d182f207bd3.ingest.v6
                #         "mapping": {
                #             "type": "date",
                #             "ignore_malformed": True
                #         }
                #     }
                # },
                # {
                #     "exception2-template": {
                #         "match_mapping_type": "long",
                #         # "match": "numero", #test-q-plus-wfs.c1b069ca-181d-4265-9838-8d182f207bd3.ingest.v6
                #         "mapping": {
                #             "type": "long",v9
                #             "ignore_malformed": True
                #         }
                #     }
                # },
                # {
                #     "exception3-template": {
                #         "match_pattern": "regex",
                #         "match": "data-fr\.properties\.address", #test-q-plus-wfs.c1b069ca-181d-4265-9838-8d182f207bd3.ingest.v6
                #         "mapping": {
                #           "type": "object",
                #           "ignore_malformed": True
                #         }
                #     }
                # },
                # {
                #     "exception4-template": {
                #         "match_mapping_type": "object",
                #         # "match": "numero", #test-q-plus-wfs.c1b069ca-181d-4265-9838-8d182f207bd3.ingest.v6
                #         "mapping": {
                #             "type": "object",
                #             "ignore_malformed": True
                #         }
                #     }
                # },search_ana
                # {
                #     "exception5-template": {
                #         "match_pattern": "regex",
                #         "match": "openinghoursspecification",
                #         # "match": "numero", #test-q-plus-wfs.c1b069ca-181d-4265-9838-8d182f207bd3.ingest.v6
                #         "mapping": {
                #             "type": "nested",
                #             "ignore_malformed": True
                #         }
                #     }
                # },
                {
                    "data-template": {
                      "match_pattern": "regex",
                      "path_match": "data-fr\.properties\..+",
                      "match_mapping_type": "string",
                      "mapping": {
                        "type":     "text",
                        # "ignore_malformed": True,
                        "analyzer": "edge_ngram_analyzer_with_asciifolding",
                        # "search_analyzer": "standard",
                        "search_analyzer": "my_search_analyzer",
                        "term_vector": "with_positions_offsets",
                        "copy_to": ["data", "data_and_metadata"],
                        "index_options": "offsets",
                        "fields": {
                          "keyword": {
                            "type": "keyword",
                            "ignore_above": 256,
                          },
                          "sort": {
                            "type": "keyword"
                          },
                          "suggest": {
                            "type": "completion",
                            "analyzer": "simple"
                            #"preserve_position_increments":
                          }
                        }
                      }
                    }
                },
                {
                    "string-template": {
                      "match_mapping_type": "string",
                      "mapping": {
                        "type":     "text",
                        # "ignore_malformed": True,
                        "analyzer": "edge_ngram_analyzer_with_asciifolding",
                        # "search_analyzer": "standard",
                        "search_analyzer": "my_search_analyzer",
                        "term_vector": "with_positions_offsets",
                        "copy_to": "data_and_metadata",
                        "index_options": "offsets",
                        "fields": {
                          "keyword": {
                            "type": "keyword",
                            "ignore_above": 256,
                          },
                          "sort": {
                            "type": "keyword"
                          },
                          "suggest": {
                            "type": "text",
                            "analyzer": "my_suggest_analyzer"
                            #"preserve_position_increments":
                          }
                        #   "trigram": {
                        #     "type": "text",
                        #     "analyzer": "trigram"
                        #   },
                        #   "reverse": {
                        #       "type": "text",
                        #       "analyzer": "reverse"
                        #   }
                        }
                      }
                    }
                }
            ]
DESPRES Damien's avatar
DESPRES Damien committed
356
        }
Alessandro CERIONI's avatar
Alessandro CERIONI committed
357
358
    }
}