Commit 684496d2 authored by Fabien FORESTIER's avatar Fabien FORESTIER
Browse files

Merge branch 'master' into 'development'

# Conflicts:
#   .gitlab-ci.yml
parents 099f4ed8 2960a68d
Pipeline #3216 passed with stages
in 55 seconds
......@@ -4,12 +4,12 @@ WORKDIR /app
RUN mkdir cache
COPY requirements.txt .
RUN pip install -r requirements.txt
COPY api.py .
COPY main.py .
RUN pip install -r requirements.txt
EXPOSE 8000
VOLUME /cache
CMD gunicorn --workers=2 --timeout 300 -b 0.0.0.0:8000 api:api
CMD gunicorn --workers=2 --timeout 300 -b 0.0.0.0:8000 --log-level=info --preload api:api
......@@ -3,11 +3,39 @@ from flask import request, send_file
import os.path
from apscheduler.schedulers.background import BackgroundScheduler
import atexit
import random
import logging
#logging.basicConfig(format="[%(asctime)s] [%(process)d] [%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S %z")
from main import refresh_cache
scheduler = BackgroundScheduler()
# It was observed that GeoNetwork does not handle multiple concurrent requests.
# As a matter of facts, in case multiple instances of this service refresh the cache
# concurrently, they all receive a 502 Proxy Error response. In order to avoid that,
# we let the the various instances of this service refresh the cache at different times,
# with at least 5 minutes between each request. The following configuration allows for
# 10 concurrent requests.
random_minute = 5*random.randint(0, 9)
scheduler.add_job(refresh_cache, 'cron', hour=6, minute=random_minute)
#scheduler.add_job(refresh_cache, 'interval', seconds=random_minute)
scheduler.start()
# Shut down the scheduler when exiting the app
atexit.register(lambda: scheduler.shutdown())
api = Flask(__name__, static_url_path='')
if __name__ != '__main__':
gunicorn_logger = logging.getLogger('gunicorn.error')
api.logger.handlers = gunicorn_logger.handlers
api.logger.setLevel(gunicorn_logger.level)
api.logger.info("Cache will refreshed every day at %s minutes past 6 AM (GMT)." % random_minute)
@api.route("/")
def index():
......@@ -15,27 +43,18 @@ def index():
if not os.path.isfile('cache/%s' % filename):
refresh_cache()
user_agent = request.headers.get('User-Agent')
if 'windows' in user_agent.lower():
filename = 'catalog-windows1252.csv'
try:
user_agent = request.headers.get('User-Agent')
if 'windows' in user_agent.lower():
filename = 'catalog-windows1252.csv'
except:
pass
return send_file('cache/%s' % filename,
mimetype='text/csv',
attachment_filename=filename,
as_attachment=True)
@api.before_first_request
def init_scheduler():
scheduler = BackgroundScheduler()
scheduler.add_job(refresh_cache, 'cron', hour='6', minute='0')
#scheduler.add_job(refresh_cache, 'interval', seconds=3)
scheduler.start()
# Shut down the scheduler when exiting the app
atexit.register(lambda: scheduler.shutdown())
if __name__ == '__main__':
api.run(host='0.0.0.0', port=8000, debug=True)
......@@ -3,10 +3,26 @@ import subprocess
import hashlib
import codecs
import os
import yaml
import time
import logging
with open('config.yaml') as fp:
cfg = yaml.load(fp)
from yaml import load, dump
try:
from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
from yaml import Loader, Dumper
# read 'n' parse the configuration
with open("config.yaml", 'r') as yamlfile:
cfg = load(yamlfile, Loader=Loader)
#logging.basicConfig(format="[%(asctime)s] [%(process)d] [%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S %z")
if __name__ != '__main__':
gunicorn_logger = logging.getLogger('gunicorn.error')
log = logging.getLogger("cache-refresher")
log.handlers = gunicorn_logger.handlers
log.setLevel(gunicorn_logger.level)
......@@ -28,9 +44,23 @@ def refresh_cache():
#
# if file_found == False or age > 86400:
print('Fetching a new file...')
res = requests.get(url)
print('Done.')
log.info('Refreshing cache...')
done = False
while not done:
res = requests.get(url)
if res.status_code != 200:
log.error('Something went wrong when hitting the following URL: %s' % url)
log.error('Here is the response:')
log.error(res)
log.error('Sleeping for 5 seconds before retrying...')
time.sleep(5)
done = False
else:
log.info('Done.')
done = True
break
if not os.path.exists(working_directory):
os.makedirs(working_directory)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment