Skip to content
Snippets Groups Projects
Commit 49a95979 authored by Alessandro Cerioni's avatar Alessandro Cerioni
Browse files

Adding slugs for each dataset

parent 2cdba63b
Branches
Tags
No related merge requests found
......@@ -79,7 +79,7 @@ def send_page( the_session_id, the_geonetwork_root_url, the_dest_index, the_page
msg = {'header': {'geonetwork_root_url': the_geonetwork_root_url, 'session_id': the_session_id, 'dest_index': the_dest_index}, 'body': the_page}
the_body = msgpack.packb(msg, use_bin_type=True)
the_body = msgpack.packb(msg, use_bin_type=False)
the_channel.basic_publish( exchange=the_exchange,
routing_key=the_routing_key,
......
......@@ -6,11 +6,12 @@ import time
from dateutil.parser import parse
import hashlib
import json
from utils.exit_gracefully import exit_gracefully
import re
from utils.exit_gracefully import exit_gracefully
from utils.my_logging import logging
from utils.fix_links import fix_links
from utils.enrich_links import enrich_links
from utils.generate_slug import generate_slug
def list_to_dictlist( the_input, the_context=None ):
......@@ -142,10 +143,13 @@ def process_records( in_records, geonetwork_root_url, working_directory, credent
#print( in_records[0].keys() )
out_records = []
#out_records = []
for in_record in in_records:
the_uuid = in_record['geonet:info']['uuid']
logging.info("Processing record %s..." % the_uuid)
out_record = {}
# all the content of the original record in "mounted" at "metadata-fr"
out_record['metadata-fr'] = in_record.copy()
......@@ -264,10 +268,11 @@ def process_records( in_records, geonetwork_root_url, working_directory, credent
#pprint(out_record)
out_records.append(out_record)
#out_records.append(out_record)
#print('-'*80)
yield out_record
return out_records
#return out_records
......@@ -289,10 +294,6 @@ def process_page( channel, method, properties, body, **kwargs):
#dispatch
for metadata_record in out_records:
the_uuid = metadata_record['metadata-fr']['geonet:info']['uuid']
logging.info("Processing record %s..." % the_uuid)
# let's look for a WFS ressource to potentially fetch and index...
wfs_found = False
......@@ -310,6 +311,10 @@ def process_page( channel, method, properties, body, **kwargs):
full_version = metadata_record.copy() # including metadata AND data
full_version['uuid'] = metadata_record['metadata-fr']['geonet:info']['uuid'] + '.full'
full_version['type'] = metadata_record['metadata-fr']['type']
the_full_title = metadata_record['metadata-fr']['title']
the_slug = generate_slug(the_full_title)
logging.info('Slug for "%s": %s' % (the_full_title, the_slug))
full_version['slug'] = the_slug
msg = {'header': {'wfs_info': link, 'offset': 0, 'session_id': session_id, 'dest_index': dest_index}, 'body': full_version}
......@@ -326,6 +331,10 @@ def process_page( channel, method, properties, body, **kwargs):
meta_version = metadata_record.copy() # including metadata ONLY
meta_version['uuid'] = metadata_record['metadata-fr']['geonet:info']['uuid'] + '.meta'
meta_version['type'] = metadata_record['metadata-fr']['type']
the_full_title = metadata_record['metadata-fr']['title']
the_slug = generate_slug(the_full_title)
logging.info('Slug for "%s": %s' % (the_full_title, the_slug))
meta_version['slug'] = the_slug
msg = {'header': { "index" : { "_index" : dest_index, "_type" : "_doc" } }, 'body': meta_version}
the_body = msgpack.packb(msg, use_bin_type=True)
......
......@@ -126,7 +126,7 @@ template = {
{
"keyword-template" : {
"match_pattern": "regex",
"path_match": ".*md5.*|metadata-fr\.link\.formats.*|metadata-fr\.link\.service.*|metadata-fr\.parentId.*",
"path_match": ".*md5.*|metadata-fr\.link\.formats.*|metadata-fr\.link\.service.*|metadata-fr\.parentId.*|slug",
"mapping": {
"type": "text",
"index": False,
......
......@@ -12,3 +12,5 @@ elasticsearch>=6.0.0,<7.0.0
GeoAlchemy2==0.5.0
psycopg2-binary>=2.7.0
sqlalchemy>=1.2.0,<1.3.0
python-slugify
nltk
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment