Skip to content
Snippets Groups Projects
Commit 3a24fa1e authored by Alessandro Cerioni's avatar Alessandro Cerioni
Browse files

Initial commit.

parents
Branches
Tags
No related merge requests found
FROM python:slim
WORKDIR /app
RUN mkdir cache
COPY requirements.txt .
COPY api.py .
COPY main.py .
RUN pip install -r requirements.txt
EXPOSE 8000
VOLUME /cache
CMD gunicorn --workers=2 --timeout 300 -b 0.0.0.0:8000 api:api
api.py 0 → 100644
from flask import Flask
from flask import request, send_file
import os.path
from apscheduler.schedulers.background import BackgroundScheduler
import atexit
import logging
from main import refresh_cache
api = Flask(__name__, static_url_path='')
@api.route("/")
def index():
filename = 'catalog-utf8.csv' # default
if not os.path.isfile('cache/%s' % filename):
refresh_cache()
user_agent = request.headers.get('User-Agent')
if 'windows' in user_agent.lower():
filename = 'catalog-windows1252.csv'
return send_file('cache/%s' % filename,
mimetype='text/csv',
attachment_filename=filename,
as_attachment=True)
@api.before_first_request
def init_scheduler():
scheduler = BackgroundScheduler()
scheduler.add_job(refresh_cache, 'cron', hour='4', minute='0')
#scheduler.add_job(refresh_cache, 'interval', seconds=3)
scheduler.start()
# Shut down the scheduler when exiting the app
atexit.register(lambda: scheduler.shutdown())
if __name__ == '__main__':
api.run(host='0.0.0.0', port=8000, debug=True)
geonetwork:
root_url: <GeoNetwork's route URL, ex. https://your-domain/catalog>
version: "3.0"
services:
catalog:
build: .
ports:
- 8000:8000
volumes:
- ./config.yaml:/app/config.yaml
- catalog-cache:/app/cache
volumes:
catalog-cache:
main.py 0 → 100644
import requests
import subprocess
import hashlib
import codecs
import os
import yaml
with open('config.yaml') as fp:
cfg = yaml.load(fp)
def refresh_cache():
working_directory = 'cache'
url = cfg['geonetwork']['root_url'] + '/srv/eng/csv.search?sortBy=source'
md5 = hashlib.md5( url.encode("utf-8") ).hexdigest()
filename = os.path.join( working_directory, 'raw_catalog_%s.csv' % md5)
# file_found = False
#
# try:
# mtime = os.path.getmtime(filename)
# age = time.time()-mtime # seconds
# file_found = True
# except Exception as e:
# print(e)
#
# if file_found == False or age > 86400:
print('Fetching a new file...')
res = requests.get(url)
print('Done.')
if not os.path.exists(working_directory):
os.makedirs(working_directory)
with codecs.open(filename, 'w', 'utf-8') as fp:
fp.write(res.text)
with codecs.open(filename, 'rb') as fp:
raw_catalog = fp.read()
utf8_filename = os.path.join( working_directory, 'catalog-utf8.csv')
win1252_filename = os.path.join( working_directory, 'catalog-windows1252.csv')
with codecs.open(utf8_filename, 'w', 'utf8') as fp:
fp.write(raw_catalog.decode('utf8').replace('###', ','))
with codecs.open(win1252_filename, 'wb') as fp:
fp.write(raw_catalog.decode('utf8').encode('cp1252', errors='replace'))
# cmd = ['iconv', '-f', 'UTF-8', '-t', 'WINDOWS-1252//TRANSLIT', '-o', win1252_filename, utf8_filename]
# process = subprocess.run(cmd)
return
if __name__ == '__main__':
refresh_cache()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment