Commit 83d7a26e authored by Alessandro CERIONI's avatar Alessandro CERIONI
Browse files

- Renamed some queues.

- Moving the HEAD request in utils/fix_links.py within the try-except block, so as to prevent the script from failing as soon as an invalid URL is found.
parent 94686bf4
......@@ -15,14 +15,14 @@ rabbitmq:
exchange: download.recette.data.grandlyon.com_index
routing_key_1_suffix: metadata_pages_to_process
queue_name_1_suffix: metadata_pages_to_process
routing_key_2_suffix: docs_to_index
queue_name_2_suffix: docs_to_index
routing_key_3_suffix: docs_to_enrich
queue_name_3_suffix: docs_to_enrich
routing_key_2_suffix: doc_pages_to_index
queue_name_2_suffix: doc_pages_to_index
routing_key_3_suffix: doc_pages_to_enrich
queue_name_3_suffix: doc_pages_to_enrich
routing_key_4_suffix: doc_pages_to_store_in_mongo
queue_name_4_suffix: doc_pages_to_store_in_mongo
routing_key_5_suffix: doc_pages_process
queue_name_5_suffix: doc_pages_process
routing_key_5_suffix: doc_pages_to_process
queue_name_5_suffix: doc_pages_to_process
routing_key_6_suffix: reindex_tasks
queue_name_6_suffix: reindex_tasks
......
......@@ -15,14 +15,14 @@ rabbitmq:
exchange: download.data.grandlyon.com_index
routing_key_1_suffix: metadata_pages_to_process
queue_name_1_suffix: metadata_pages_to_process
routing_key_2_suffix: docs_to_index
queue_name_2_suffix: docs_to_index
routing_key_3_suffix: docs_to_enrich
queue_name_3_suffix: docs_to_enrich
routing_key_2_suffix: doc_pages_to_index
queue_name_2_suffix: doc_pages_to_index
routing_key_3_suffix: doc_pages_to_enrich
queue_name_3_suffix: doc_pages_to_enrich
routing_key_4_suffix: doc_pages_to_store_in_mongo
queue_name_4_suffix: doc_pages_to_store_in_mongo
routing_key_5_suffix: doc_pages_process
queue_name_5_suffix: doc_pages_process
routing_key_5_suffix: doc_pages_to_process
queue_name_5_suffix: doc_pages_to_process
routing_key_6_suffix: reindex_tasks
queue_name_6_suffix: reindex_tasks
......
......@@ -37,14 +37,9 @@ def fix_links( links ):
# FIX links in which the declared protocol is as bizarre as "WWW:LINK-1.0-http--link"
if 'protocol' in link.keys() and any( [x in link['protocol'] for x in ['WWW'] ] ):
# let's try getting the information from the Web Server...
resp = requests.head( link['url'], allow_redirects=True )
# print()
# print(link['url'], resp.headers)
# print()
try:
# let's try getting the information from the Web Server...
resp = requests.head( link['url'], allow_redirects=True )
# the presence of the content-length assures that the Web Server knows what it is talking about,
# that is why we include the following line in this try-except block
fixed_links[k]['content-length'] = resp.headers['Content-Length']
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment