Skip to content
Snippets Groups Projects
Commit 3a24fa1e authored by Alessandro Cerioni's avatar Alessandro Cerioni
Browse files

Initial commit.

parents
No related branches found
No related tags found
No related merge requests found
FROM python:slim
WORKDIR /app
RUN mkdir cache
COPY requirements.txt .
COPY api.py .
COPY main.py .
RUN pip install -r requirements.txt
EXPOSE 8000
VOLUME /cache
CMD gunicorn --workers=2 --timeout 300 -b 0.0.0.0:8000 api:api
api.py 0 → 100644
from flask import Flask
from flask import request, send_file
import os.path
from apscheduler.schedulers.background import BackgroundScheduler
import atexit
import logging
from main import refresh_cache
api = Flask(__name__, static_url_path='')
@api.route("/")
def index():
filename = 'catalog-utf8.csv' # default
if not os.path.isfile('cache/%s' % filename):
refresh_cache()
user_agent = request.headers.get('User-Agent')
if 'windows' in user_agent.lower():
filename = 'catalog-windows1252.csv'
return send_file('cache/%s' % filename,
mimetype='text/csv',
attachment_filename=filename,
as_attachment=True)
@api.before_first_request
def init_scheduler():
scheduler = BackgroundScheduler()
scheduler.add_job(refresh_cache, 'cron', hour='4', minute='0')
#scheduler.add_job(refresh_cache, 'interval', seconds=3)
scheduler.start()
# Shut down the scheduler when exiting the app
atexit.register(lambda: scheduler.shutdown())
if __name__ == '__main__':
api.run(host='0.0.0.0', port=8000, debug=True)
geonetwork:
root_url: <GeoNetwork's route URL, ex. https://your-domain/catalog>
version: "3.0"
services:
catalog:
build: .
ports:
- 8000:8000
volumes:
- ./config.yaml:/app/config.yaml
- catalog-cache:/app/cache
volumes:
catalog-cache:
main.py 0 → 100644
import requests
import subprocess
import hashlib
import codecs
import os
import yaml
with open('config.yaml') as fp:
cfg = yaml.load(fp)
def refresh_cache():
working_directory = 'cache'
url = cfg['geonetwork']['root_url'] + '/srv/eng/csv.search?sortBy=source'
md5 = hashlib.md5( url.encode("utf-8") ).hexdigest()
filename = os.path.join( working_directory, 'raw_catalog_%s.csv' % md5)
# file_found = False
#
# try:
# mtime = os.path.getmtime(filename)
# age = time.time()-mtime # seconds
# file_found = True
# except Exception as e:
# print(e)
#
# if file_found == False or age > 86400:
print('Fetching a new file...')
res = requests.get(url)
print('Done.')
if not os.path.exists(working_directory):
os.makedirs(working_directory)
with codecs.open(filename, 'w', 'utf-8') as fp:
fp.write(res.text)
with codecs.open(filename, 'rb') as fp:
raw_catalog = fp.read()
utf8_filename = os.path.join( working_directory, 'catalog-utf8.csv')
win1252_filename = os.path.join( working_directory, 'catalog-windows1252.csv')
with codecs.open(utf8_filename, 'w', 'utf8') as fp:
fp.write(raw_catalog.decode('utf8').replace('###', ','))
with codecs.open(win1252_filename, 'wb') as fp:
fp.write(raw_catalog.decode('utf8').encode('cp1252', errors='replace'))
# cmd = ['iconv', '-f', 'UTF-8', '-t', 'WINDOWS-1252//TRANSLIT', '-o', win1252_filename, utf8_filename]
# process = subprocess.run(cmd)
return
if __name__ == '__main__':
refresh_cache()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment