diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..488d9e165a17e696f9ca8fd82785e9c5f803265d --- /dev/null +++ b/config.yaml @@ -0,0 +1,170 @@ +session: + #id: abcd1234 + working_directory: output + #log_level: DEBUG + +geonetwork: + url: https://download.data.grandlyon.com/catalogue/srv/fre/q + records_per_page: 100 + username: compte-technique-moteur-de-recherche-1 + password: g98VKrXJDpesa4UJ + +credentials: + download.data.grandlyon.com: + username: compte-technique-moteur-de-recherche-1 + password: g98VKrXJDpesa4UJ + +mongo: + database: indexerdb + collection: indexer_logs + user: root + password: example + host: mongo +# host: localhost + port: 27017 + +rabbitmq: +# host: localhost + host: rabbitmq + port : 5672 + user : admin + password : admin + routing_key_logs: scripts_log_key + queue_logs_name: session_logs + exchange_logs_name: download_data_grandlyon_com_logs + routing_key_1: metadata_records_to_process + queue_name_1: metadata_records_to_process + routing_key_2: doc_pages_to_index + queue_name_2: doc_pages_to_index + routing_key_3: doc_pages_to_enrich + queue_name_3: doc_pages_to_enrich + routing_key_4: reindex_tasks + queue_name_4: reindex_tasks + routing_key_5: doc_pages_to_process + queue_name_5: doc_pages_to_process + routing_key_6: sampling_tasks + queue_name_6: sampling_tasks + +postgis: + username: grandlyon + password: + host: 147.135.195.26 + databases: + grandlyon: + # whitelist: + # # # - fpc_fond_plan_communaut.fpclignefaitage + # - pvo_patrimoine_voirie.pvochausseetrottoir + #- gip_proprete.gipdecheterie_3_0_0 + rdata: + #whitelist: + tuba: + #whitelist: + ldata: + whitelist: + - bruit.stations_observatoire_acoustique + - bruit.stations_observatoire_acoustique_time + - velov.stations + - velov.stations_time + +metadata_getter: + uuids_to_get: + #- all + #- c1b069ca-181d-4265-9838-8d182f207bd3 # arbres alignement + #- 7c31bbee-9155-4a2d-964f-b13a9df5c542 + #- 445c0ec2-7079-4a50-94bd-feafc86a7366 # sentiers nature + #- 171e406c-df1f-47e3-b241-33f938f49951 # PDF détecté comme HTML ! + #- ef6e4d78-4d8f-485f-b4d8-8b3b78047a6c # WWWW:LINK-1.0-http--link protocol ! + #- 607435ed-fb52-4022-a3a4-1132094d4e60 # KML badly handled + #- 3939483d-2261-4e03-83ec-23f9683f8cb7 # legalConstraints is not an array + #- 9bc6806d-e8a0-463b-aaa1-4364a75e44d7 # SOS badly handled + #- 4e9c6cda-450c-4e19-a9ff-97095002df5d # bad license + # --------------------------------------------------------------------------------------- + # - 8373ded5-0525-4875-971c-77adb46ad966 # legalConstraints is not an array + # - ef6e4d78-4d8f-485f-b4d8-8b3b78047a6c # legalConstraints is not an array + # - d953c691-9177-4937-9f76-b6a7ea0e62c0 # legalConstraints is not an array + #- 6a866f2d-5588-4ee0-bec1-126f70acb1db # DC, no individualName + #- ae2c2441-2afa-4f95-a8a8-b09fa449aced # DC, no individualName, / in the organisationName + # ---------------------------------------------------------------------------------------- + #- 567c8c34-51d4-425b-81f2-ab5e888c3d4c # image/tiff format + #- 34bb2853-57d3-42d5-ae32-a2c09274cd3c # Wrong table name + #- 8acfec15-1281-4af5-be75-b978bc5244bf # Points d'int\u00e9r\u00eat touristiques de la M\u00e9tropole de Lyon + - e23c6d3e-40be-4d5e-bc15-4b7e7313942f # lignes de faîtage + #- 7bb97eb0-3c88-4ae9-b6f3-39c9c881bafb + #- 285df15a-9503-4ec5-87f5-45a7c15d37da + #- c1b069ca-181d-4265-9838-8d182f207bd3 # arbres alignement + #- 96d10bfe-3459-4acc-a9fa-7d06bc0340ec + #- d570f3b4-9c91-4302-90d3-733f84299280 + #- 6cfdd8b5-7aef-4b7c-bae6-17918638e1b1 + #- 14c225e8-586c-41a2-8d6b-88c19f9ce08a + #- 2d37da57-27d3-4016-a95e-a9824d98bbd3 + #- 46eac9e4-33cb-45b7-a104-7bcc8e654c98 + #- 2a89f780-f1b8-4ba0-9c92-2f8ec89b0080 # rando, prod + #- 3851f46a-7907-41c4-834a-a6b23f7adc1b # calculateur, prod + #- e77dedb7-b30a-4d2d-b38a-2fdeab0fd4bb + #- 059d00a0-1050-4801-8869-2417aa05b2dd # <- identifiant is int + #- 0c575de5-54e9-4e8c-8bf0-13910d7cbadb # <- identifiant is str + #- 71170457-a76e-4cd7-8bba-057681fd3f39 + #- 58071df6-80bb-4b85-a7fa-bcea082c9409 + # Tests AUTHN/AUTHZ + #- 56a61ec4-8ce7-46ab-bdc4-3365e2d95635 # Open Data + #- ea3d6ea9-0878-4ee3-a8cc-1601b919754b # TR, accès restreint + #- 249a0648-7151-406a-9f71-0b7fb903c1a1 # PRIVVENISSIEUX + #- 46eac9e4-33cb-45b7-a104-7bcc8e654c98 # PROD - Vélo'v temps réel + #- 01852a44-e2a8-4587-929f-6068349f50aa # REC - Vélo'v v. 2.0.0 (Open Data) + #- ea3d6ea9-0878-4ee3-a8cc-1601b919754b # REC - Trafic TR (accès restreint) + #- c7a04343-804b-426a-9141-e20e0fd5efb4 # REC - Candélabres de Limonest (privé) + #- b2ac0f47-83fd-4112-af37-b25712dc8477 # Wrong split in organisationName / individualName + #- f398b780-6056-4f63-85aa-b623e8a7a863 # Dalle + #- 8f5c7404-2701-49bc-bcf3-3b1e46c2ac34 # Atmo -> ATMO + # - 104be2cb-e17d-4a93-b11f-f40a1acf53bc # ortho infra 2012 + # - 86f22d7a-1d61-405b-a2f8-9ebf2ca1e8e7 # ortho infra 2015 + # - cd104534-0adb-4629-b765-75916d74bd56 # ortho infra 2009 + # - 8ad29de5-f093-42a8-96e1-3ecec541d67f # enseignement artistique + # - 4790b8b3-206c-42f9-8e08-47bdbe5d0d38 # biblio + # - 106f2299-f564-4cf3-9fd8-515c89b566e7 # biblio (encore!) + # - a5aef8c7-002a-4dd5-a985-39f8f4f69265 # ortho 2012 + # #- 281c840d-8d43-4c86-877a-aafb46dc5482 # habillage topo + # - cc86940e-1326-46c7-876e-77dc889caa04 # enseignement artistique + # - 78adcd03-b95d-4c9d-bc54-0f5fa748915a # LIDAR 2015 + # - 47f840d6-b82e-4a13-bc3c-d6cd468195bd # Tronçon de la trame viaire + # - 22c64d24-0ceb-4901-a89e-3e136944315a # ORTHO 2015 + # - c95be61c-810c-45af-b71a-b00c8bb7ecfd # CONTOURS PLAN TOPO + #- 015b60fa-836f-4880-9ef2-10859e6e9c89 # Points d'arrêt du réseau Transports en Commun Lyonnais - can't do WMS queries + #- ee1242ea-ec34-4be0-a77f-b93330229d1c + # - 2ee95d28-c858-4875-a1fe-2b32a1e1e896 # Lab' LPA -> JSON erroneusly interpreted as WS + #- 2f81dcbf-e0db-46d2-8f05-e1d1825de239 # RNSA (name in JSON resource) + uuids_to_filter_out: + - ef5745f1-060d-496d-8aea-2461538964a8 +# - eff0fb32-d259-45b8-b662-9eb00c64ade5 + - 104be2cb-e17d-4a93-b11f-f40a1acf53bc # ortho infra 2012 + # - 86f22d7a-1d61-405b-a2f8-9ebf2ca1e8e7 # ortho infra 2015 + # - cd104534-0adb-4629-b765-75916d74bd56 # ortho infra 2009 + # - 8ad29de5-f093-42a8-96e1-3ecec541d67f # enseignement artistique + # - 4790b8b3-206c-42f9-8e08-47bdbe5d0d38 # biblio + # - 106f2299-f564-4cf3-9fd8-515c89b566e7 # biblio (encore!) + # - a5aef8c7-002a-4dd5-a985-39f8f4f69265 # ortho 2012 + # - 281c840d-8d43-4c86-877a-aafb46dc5482 # habillage topo + # - cc86940e-1326-46c7-876e-77dc889caa04 # enseignement artistique + # - 78adcd03-b95d-4c9d-bc54-0f5fa748915a # LIDAR 2015 + # - 47f840d6-b82e-4a13-bc3c-d6cd468195bd # Tronçon de la trame viaire + # - 22c64d24-0ceb-4901-a89e-3e136944315a # ORTHO 2015 + # - c95be61c-810c-45af-b71a-b00c8bb7ecfd # CONTOURS PLAN TOPO + +field_type_detector: + rewrite: true + debug: false + +indexer: + url: https://elastic2.alpha.grandlyon.com + index: download.data.grandlyon.com.ingest.test-denis + +reindexer: + source_url: https://elastic2.alpha.grandlyon.com:443 # N.B.: the port number MUST be explicit; this parameter is optional + #destination_url: https://elastic2.alpha.grandlyon.com + destination_url: https://elastic2.alpha.grandlyon.com + source_index: download.data.grandlyon.com.ingest.test-denis + destination_index: download.data.grandlyon.com.test-denis + template_name: download.data.grandlyon.com.test-denis + template_index_pattern: download.data.grandlyon.com.test-denis + number_of_shards: 24 + number_of_replicas: 0 diff --git a/workers/doc-enricher.py b/workers/doc-enricher.py index af44a6ab0d7917306d95ad491541bf9f0f129aab..4dc10c2410945975273c8577ea455c297d6d474e 100644 --- a/workers/doc-enricher.py +++ b/workers/doc-enricher.py @@ -365,6 +365,7 @@ if __name__ == '__main__': args = parser.parse_args() cfg = dict() + cfg['rabbitmq'] = dict() cfg['rabbitmq_host'] = args.host cfg['rabbitmq_port'] = args.port cfg['rabbitmq_exchange'] = args.exchange diff --git a/workers/doc-indexer.py b/workers/doc-indexer.py index 1143c99cdfbe6f8056cbe241e051b7cf2c2a9c28..7eb62f75a936ddabeda936c9ffd0840a7b1db607 100644 --- a/workers/doc-indexer.py +++ b/workers/doc-indexer.py @@ -268,6 +268,7 @@ if __name__ == '__main__': args = parser.parse_args() cfg = dict() + cfg['rabbitmq'] = dict() cfg['rabbitmq_host'] = args.host cfg['rabbitmq_port'] = args.port cfg['rabbitmq_exchange'] = args.exchange diff --git a/workers/doc-processor.py b/workers/doc-processor.py index 43dedf7ea866c1a861a9f3f9c46305c9a4947210..feabcc07785b7dcb4a05518a4df3741b0440ffb4 100644 --- a/workers/doc-processor.py +++ b/workers/doc-processor.py @@ -234,6 +234,7 @@ if __name__ == '__main__': args = parser.parse_args() cfg = dict() + cfg['rabbitmq'] = dict() cfg['rabbitmq_host'] = args.host cfg['rabbitmq_port'] = args.port cfg['rabbitmq_exchange'] = args.exchange diff --git a/workers/reindexer.py b/workers/reindexer.py index f20ad024aff9e75df7ffe831a56dbf42354bba92..8537a248e089d500f3136aefbda0d49da64d4150 100644 --- a/workers/reindexer.py +++ b/workers/reindexer.py @@ -285,6 +285,7 @@ if __name__ == '__main__': args = parser.parse_args() cfg = dict() + cfg['rabbitmq'] = dict() cfg['rabbitmq_host'] = args.host cfg['rabbitmq_port'] = args.port cfg['rabbitmq_exchange'] = args.exchange diff --git a/workers/sample-generator.py b/workers/sample-generator.py index c949dd856d0bf4617eb41c2d65ea69f39b0bfab3..aa90585f61eca10b8ee12b78485ac8cf37638325 100644 --- a/workers/sample-generator.py +++ b/workers/sample-generator.py @@ -326,6 +326,7 @@ if __name__ == "__main__": args = parser.parse_args() cfg = dict() + cfg['rabbitmq'] = dict() cfg['rabbitmq_host'] = args.host cfg['rabbitmq_port'] = args.port cfg['rabbitmq_exchange'] = args.exchange