diff --git a/utils/fix_links.py b/utils/fix_links.py index 0245c9a4fa8843028792807744e6ecd1d29518da..501e35353dff540680cbd69ef0cae8ca795aa26c 100644 --- a/utils/fix_links.py +++ b/utils/fix_links.py @@ -13,7 +13,7 @@ def translate_content_type( content_type ): output = content_type # TODO: complete the following list! - types = ['pdf', 'html', 'zip', 'xml', 'javascript', 'json', 'csv'] + types = ['pdf', 'html', 'zip', 'xml', 'javascript', 'json', 'csv', 'tiff'] for the_type in types: if the_type in content_type: @@ -26,6 +26,10 @@ def translate_content_type( content_type ): if content_type == 'application/vnd.ms-excel': output = 'XLS' + print('S ********************') + print(output) + print('E ********************') + return output @@ -53,6 +57,12 @@ def protocol_to_formats_and_services( links ): elif link['protocol'] == 'SOS': output[k]['formats'] = ['JSON', 'XML'] output[k]['service'] = 'SOS' + elif link['protocol'] == 'HTML': + # in order to prevent HTML ressources to be deemed as downloadable + pass + elif link['protocol'].startswith("WWW:"): + # in order to prevent HTML ressources to be deemed as downloadable + pass else: output[k]['formats'] = [ link['protocol'] ] @@ -83,7 +93,7 @@ def fix_links( links, credentials=None ): # FIX links in which the declared protocol is as bizarre as "WWW:LINK-1.0-http--link" # The KML protocol needs also to be fixed. if 'protocol' in link.keys() and any( [x in link['protocol'] for x in ['WWW', 'kml', 'html', 'null'] ] ): - print() + #print() #print(link['url']) try: # let's try getting the information from the Web Server... @@ -121,7 +131,7 @@ def fix_links( links, credentials=None ): except Exception as e: logging.debug(e) # ...otherwise, we make a guess on the basis of the information carried by the URL - known_formats = ['ecw', 'pdf', 'zip', 'kml', 'json', 'tif', 'tiff', 'csv', 'sos'] + known_formats = ['ecw', 'pdf', 'zip', 'kml', 'json', 'tif', 'tiff', 'csv'] # sos for known_format in known_formats: if link['url'].lower().endswith(known_format):