Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
web-et-numerique
web-et-numerique-internet
data.grandlyon.com
web-portal
components
services
csv-catalog-downloader
Commits
684496d2
Commit
684496d2
authored
Jan 21, 2020
by
Fabien FORESTIER
Browse files
Merge branch 'master' into 'development'
# Conflicts: # .gitlab-ci.yml
parents
099f4ed8
2960a68d
Pipeline
#3216
passed with stages
in 55 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Dockerfile
View file @
684496d2
...
...
@@ -4,12 +4,12 @@ WORKDIR /app
RUN
mkdir
cache
COPY
requirements.txt .
RUN
pip
install
-r
requirements.txt
COPY
api.py .
COPY
main.py .
RUN
pip
install
-r
requirements.txt
EXPOSE
8000
VOLUME
/cache
CMD
gunicorn --workers=2 --timeout 300 -b 0.0.0.0:8000 api:api
CMD
gunicorn --workers=2 --timeout 300 -b 0.0.0.0:8000
--log-level=info --preload
api:api
api.py
View file @
684496d2
...
...
@@ -3,11 +3,39 @@ from flask import request, send_file
import
os.path
from
apscheduler.schedulers.background
import
BackgroundScheduler
import
atexit
import
random
import
logging
#logging.basicConfig(format="[%(asctime)s] [%(process)d] [%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S %z")
from
main
import
refresh_cache
scheduler
=
BackgroundScheduler
()
# It was observed that GeoNetwork does not handle multiple concurrent requests.
# As a matter of facts, in case multiple instances of this service refresh the cache
# concurrently, they all receive a 502 Proxy Error response. In order to avoid that,
# we let the the various instances of this service refresh the cache at different times,
# with at least 5 minutes between each request. The following configuration allows for
# 10 concurrent requests.
random_minute
=
5
*
random
.
randint
(
0
,
9
)
scheduler
.
add_job
(
refresh_cache
,
'cron'
,
hour
=
6
,
minute
=
random_minute
)
#scheduler.add_job(refresh_cache, 'interval', seconds=random_minute)
scheduler
.
start
()
# Shut down the scheduler when exiting the app
atexit
.
register
(
lambda
:
scheduler
.
shutdown
())
api
=
Flask
(
__name__
,
static_url_path
=
''
)
if
__name__
!=
'__main__'
:
gunicorn_logger
=
logging
.
getLogger
(
'gunicorn.error'
)
api
.
logger
.
handlers
=
gunicorn_logger
.
handlers
api
.
logger
.
setLevel
(
gunicorn_logger
.
level
)
api
.
logger
.
info
(
"Cache will refreshed every day at %s minutes past 6 AM (GMT)."
%
random_minute
)
@
api
.
route
(
"/"
)
def
index
():
...
...
@@ -15,27 +43,18 @@ def index():
if
not
os
.
path
.
isfile
(
'cache/%s'
%
filename
):
refresh_cache
()
user_agent
=
request
.
headers
.
get
(
'User-Agent'
)
if
'windows'
in
user_agent
.
lower
():
filename
=
'catalog-windows1252.csv'
try
:
user_agent
=
request
.
headers
.
get
(
'User-Agent'
)
if
'windows'
in
user_agent
.
lower
():
filename
=
'catalog-windows1252.csv'
except
:
pass
return
send_file
(
'cache/%s'
%
filename
,
mimetype
=
'text/csv'
,
attachment_filename
=
filename
,
as_attachment
=
True
)
@
api
.
before_first_request
def
init_scheduler
():
scheduler
=
BackgroundScheduler
()
scheduler
.
add_job
(
refresh_cache
,
'cron'
,
hour
=
'6'
,
minute
=
'0'
)
#scheduler.add_job(refresh_cache, 'interval', seconds=3)
scheduler
.
start
()
# Shut down the scheduler when exiting the app
atexit
.
register
(
lambda
:
scheduler
.
shutdown
())
if
__name__
==
'__main__'
:
api
.
run
(
host
=
'0.0.0.0'
,
port
=
8000
,
debug
=
True
)
main.py
View file @
684496d2
...
...
@@ -3,10 +3,26 @@ import subprocess
import
hashlib
import
codecs
import
os
import
yaml
import
time
import
logging
with
open
(
'config.yaml'
)
as
fp
:
cfg
=
yaml
.
load
(
fp
)
from
yaml
import
load
,
dump
try
:
from
yaml
import
CLoader
as
Loader
,
CDumper
as
Dumper
except
ImportError
:
from
yaml
import
Loader
,
Dumper
# read 'n' parse the configuration
with
open
(
"config.yaml"
,
'r'
)
as
yamlfile
:
cfg
=
load
(
yamlfile
,
Loader
=
Loader
)
#logging.basicConfig(format="[%(asctime)s] [%(process)d] [%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S %z")
if
__name__
!=
'__main__'
:
gunicorn_logger
=
logging
.
getLogger
(
'gunicorn.error'
)
log
=
logging
.
getLogger
(
"cache-refresher"
)
log
.
handlers
=
gunicorn_logger
.
handlers
log
.
setLevel
(
gunicorn_logger
.
level
)
...
...
@@ -28,9 +44,23 @@ def refresh_cache():
#
# if file_found == False or age > 86400:
print
(
'Fetching a new file...'
)
res
=
requests
.
get
(
url
)
print
(
'Done.'
)
log
.
info
(
'Refreshing cache...'
)
done
=
False
while
not
done
:
res
=
requests
.
get
(
url
)
if
res
.
status_code
!=
200
:
log
.
error
(
'Something went wrong when hitting the following URL: %s'
%
url
)
log
.
error
(
'Here is the response:'
)
log
.
error
(
res
)
log
.
error
(
'Sleeping for 5 seconds before retrying...'
)
time
.
sleep
(
5
)
done
=
False
else
:
log
.
info
(
'Done.'
)
done
=
True
break
if
not
os
.
path
.
exists
(
working_directory
):
os
.
makedirs
(
working_directory
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment