From f11dfa18c0e6d47c77a82129a89376a5d958b8e9 Mon Sep 17 00:00:00 2001 From: DESPRES Damien Date: Fri, 14 Jan 2022 17:24:02 +0100 Subject: [PATCH 1/5] fix #12830 (cherry picked from commit 284c008043989d182456932a52ab89c27037a03d) --- workers/metadata_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workers/metadata_processor.py b/workers/metadata_processor.py index 839dd22..0b8b0cb 100644 --- a/workers/metadata_processor.py +++ b/workers/metadata_processor.py @@ -187,7 +187,7 @@ def process_record( in_record, working_directory, credentials ): out_record['metadata-fr']['license'] = 'unknown' if 'legalConstraints' in out_record['metadata-fr'].keys(): for el in out_record['metadata-fr']['legalConstraints']: - if "licence" in el.lower() or "accord" in el.lower(): + if ("licence" in el.lower() or "accord" in el.lower()) and not ("http" in el.lower()): out_record['metadata-fr']['license'] = el if 'resourceConstraints' in out_record['metadata-fr'].keys() and type(out_record['metadata-fr']['resourceConstraints']) is str: -- GitLab From 35247fae3dcb4fb4b653a7775713648027171f91 Mon Sep 17 00:00:00 2001 From: Damien DESPRES Date: Mon, 31 Jan 2022 10:47:04 +0000 Subject: [PATCH 2/5] Update .gitlab-ci.yml --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 2e5aa0c..6ce94b5 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -8,6 +8,7 @@ variables: sonarqube: stage: sonar-analysis + allow_failure: true tags: - build before_script: -- GitLab From a51d29e48e6e7b5fb6e20dd88aec01e73eb98467 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20DA=20ROCHA?= Date: Tue, 1 Mar 2022 11:35:19 +0000 Subject: [PATCH 3/5] Add error logs when indexing to ingest --- workers/doc_indexer.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/workers/doc_indexer.py b/workers/doc_indexer.py index cdc13be..efd34d2 100644 --- a/workers/doc_indexer.py +++ b/workers/doc_indexer.py @@ -159,6 +159,14 @@ def index_docs(channel, method, properties, body): channel.basic_nack(delivery_tag = method.delivery_tag, requeue=1) #print("") #logging.error(json.dumps(rep, indent=4)) + try: + logging.error(json.dumps([ + item.get("error", {}).get("reason") + for item in rep.get("items", []) + ], + indent=4)) + except: + pass raise Exception('Failed to push documents to Elasticsearch.') return -- GitLab From 963905872edff1ce49611137477aa00887e5338e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20DA=20ROCHA?= Date: Tue, 1 Mar 2022 13:32:30 +0000 Subject: [PATCH 4/5] fix logs doc_indexer failing + force python 3.9 --- Dockerfile | 2 +- workers/doc_indexer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 14c9119..3faa31c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3-slim +FROM python:3.9-slim WORKDIR /app diff --git a/workers/doc_indexer.py b/workers/doc_indexer.py index efd34d2..4e80667 100644 --- a/workers/doc_indexer.py +++ b/workers/doc_indexer.py @@ -161,7 +161,7 @@ def index_docs(channel, method, properties, body): #logging.error(json.dumps(rep, indent=4)) try: logging.error(json.dumps([ - item.get("error", {}).get("reason") + item.get("index", {}).get("error", {}).get("reason") for item in rep.get("items", []) ], indent=4)) -- GitLab From 880ee41c6fee8ec650137df24110b6e01885e7c9 Mon Sep 17 00:00:00 2001 From: DESPRES Damien Date: Tue, 10 May 2022 11:01:24 +0200 Subject: [PATCH 5/5] fix #13829 --- workers/doc_indexer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workers/doc_indexer.py b/workers/doc_indexer.py index 4e80667..2a64948 100644 --- a/workers/doc_indexer.py +++ b/workers/doc_indexer.py @@ -24,7 +24,7 @@ def tag_doc( the_doc ): # tag_dict[tag] = False # isOpen? - if 'license' in the_doc['metadata-fr'].keys() and not any( [x in the_doc['metadata-fr']['license'] for x in ["Licence de réutilisation des données d'intérêt général","Accord de réutilisation de données en accès privé et exclusif"] ] ): + if 'license' in the_doc['metadata-fr'].keys() and not any( [x in the_doc['metadata-fr']['license'] for x in ["Licence Mobilités","Licence de réutilisation des données d'intérêt général","Accord de réutilisation de données en accès privé et exclusif"] ] ): tag_dict['isOpenAccess'] = True else: tag_dict['isOpenAccess'] = False -- GitLab