Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
web-et-numerique
web-et-numerique-internet
data.grandlyon.com
web-portal
components
indexers
editorial-content
Commits
4ffee32a
Commit
4ffee32a
authored
Jun 15, 2019
by
Alessandro CERIONI
Browse files
Improved logging
parent
1d9f8fea
Pipeline
#3231
passed with stage
in 39 seconds
Changes
10
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
api.py
View file @
4ffee32a
...
...
@@ -3,14 +3,28 @@ from flask import jsonify, render_template, request
import
time
import
json
import
yaml
import
logging
from
main
import
index_everything
,
add_content
,
update_content
,
delete_content
from
yaml
import
load
,
dump
try
:
from
yaml
import
CLoader
as
Loader
,
CDumper
as
Dumper
except
ImportError
:
from
yaml
import
Loader
,
Dumper
with
open
(
"config.yaml"
,
'r'
)
as
yamlfile
:
cfg
=
yaml
.
load
(
yamlfile
)
cfg
=
load
(
yamlfile
,
Loader
=
Loader
)
api
=
Flask
(
__name__
,
static_url_path
=
''
)
# Many thanks to https://medium.com/@trstringer/logging-flask-and-gunicorn-the-manageable-way-2e6f0b8beb2f
if
__name__
!=
'__main__'
:
gunicorn_logger
=
logging
.
getLogger
(
'gunicorn.error'
)
api
.
logger
.
handlers
=
gunicorn_logger
.
handlers
api
.
logger
.
setLevel
(
gunicorn_logger
.
level
)
@
api
.
route
(
"/"
)
def
hello
():
return
render_template
(
'index.html'
)
...
...
@@ -21,14 +35,17 @@ def hello():
# task_url = main(the_post_id)
# return jsonify( {'reindex_task_url': task_url} )
@
api
.
route
(
"/test"
,
methods
=
[
'GET'
,
'POST'
])
@
api
.
route
(
"/test"
,
methods
=
[
'GET'
])
def
test
():
data
=
request
.
data
.
decode
(
'utf-8'
)
# data is empty
#print(dir(request.data))
json_data
=
json
.
loads
(
data
)
with
open
(
'output/output.json'
,
'w'
)
as
fp
:
json
.
dump
(
json_data
,
fp
)
#print( json.dumps(json_data, indent=4) )
api
.
logger
.
info
(
'***TEST***'
)
test_log
()
# data = request.data.decode('utf-8') # data is empty
# #print(dir(request.data))
# json_data = json.loads(data)
# with open('output/output.json', 'w') as fp:
# json.dump(json_data, fp)
# #print( json.dumps(json_data, indent=4) )
# need posted data here
return
jsonify
(
{
'status'
:
'OK'
}
)
...
...
lib/alias_setter.py
View file @
4ffee32a
from
elasticsearch
import
Elasticsearch
from
elasticsearch
import
Elasticsearch
,
NotFoundError
import
json
import
logging
logging
.
basicConfig
()
logger
=
logging
.
getLogger
(
__name__
)
if
__name__
!=
'__main__'
:
gunicorn_logger
=
logging
.
getLogger
(
'gunicorn.error'
)
logger
.
handlers
=
gunicorn_logger
.
handlers
logger
.
setLevel
(
gunicorn_logger
.
level
)
def
alias_setter
(
cfg
,
content_type
):
es
=
Elasticsearch
(
[
cfg
[
'reindexer'
][
'destination_url'
]]
)
...
...
@@ -22,8 +31,10 @@ def alias_setter(cfg, content_type):
try
:
res
=
es
.
indices
.
delete_alias
(
index
=
'*'
,
name
=
alias
)
except
NotFoundError
:
pass
except
Exception
as
e
:
print
(
e
)
logger
.
error
(
e
)
res
=
es
.
indices
.
put_alias
(
index
=
destination_index
,
name
=
alias
,
...
...
lib/content_getter.py
View file @
4ffee32a
#import msgpack
import
requests
import
json
#import pika
import
logging
logging
.
basicConfig
()
logger
=
logging
.
getLogger
(
__name__
)
if
__name__
!=
'__main__'
:
gunicorn_logger
=
logging
.
getLogger
(
'gunicorn.error'
)
logger
.
handlers
=
gunicorn_logger
.
handlers
logger
.
setLevel
(
gunicorn_logger
.
level
)
def
get_token
(
root_url
,
username
,
password
):
...
...
@@ -12,8 +20,6 @@ def get_token( root_url, username, password ):
res
=
requests
.
post
(
url
,
data
=
payload
)
#print(res.json())
token
=
res
.
json
()[
'token'
]
return
token
...
...
@@ -33,7 +39,6 @@ def get_post( cfg, the_post_id ):
res
=
requests
.
get
(
root_url
+
'/wp/v2/posts/%s?_embed'
%
(
the_post_id
),
headers
=
headers
)
return
res
.
json
()
# print(dir(res))
def
get_posts_by_page
(
cfg
,
no_posts_per_page
=
10
):
...
...
@@ -52,17 +57,7 @@ def get_posts_by_page( cfg, no_posts_per_page=10 ):
while
page_no
<=
total_pages
:
# print('*'*100)
# print('page_no: ', page_no)
res
=
requests
.
get
(
root_url
+
'/wp/v2/posts?_embed&per_page=%s&page=%s&status=any'
%
(
no_posts_per_page
,
page_no
),
headers
=
headers
)
# print(dir(res))
# print(res.url)
#print( json.dumps(res.json(), indent=4) )
#print( res.headers )
#print( res.headers['X-WP-Total'], res.headers['X-WP-TotalPages'])
# exit(1)
total_pages
=
int
(
res
.
headers
[
'X-WP-TotalPages'
])
page_no
+=
1
...
...
@@ -76,4 +71,4 @@ def get_posts_by_page( cfg, no_posts_per_page=10 ):
if
__name__
==
'__main__'
:
print
(
'Nothing do. Exiting.'
)
logger
.
info
(
'Nothing do. Exiting.'
)
lib/ghost_content_getter.py
View file @
4ffee32a
#import msgpack
import
requests
import
json
import
subprocess
#import pika
import
logging
logging
.
basicConfig
()
logger
=
logging
.
getLogger
(
__name__
)
if
__name__
!=
'__main__'
:
gunicorn_logger
=
logging
.
getLogger
(
'gunicorn.error'
)
logger
.
handlers
=
gunicorn_logger
.
handlers
logger
.
setLevel
(
gunicorn_logger
.
level
)
def
get_token
(
admin_key_key
):
...
...
@@ -89,4 +98,4 @@ def get_all_content_by_page( cfg, the_content_type, no_elements_per_page=10 ):
if
__name__
==
'__main__'
:
print
(
'Nothing do. Exiting.'
)
logger
.
info
(
'Nothing do. Exiting.'
)
lib/ghost_content_processor.py
View file @
4ffee32a
import
logging
logging
.
basicConfig
()
logger
=
logging
.
getLogger
(
__name__
)
if
__name__
!=
'__main__'
:
gunicorn_logger
=
logging
.
getLogger
(
'gunicorn.error'
)
logger
.
handlers
=
gunicorn_logger
.
handlers
logger
.
setLevel
(
gunicorn_logger
.
level
)
def
process_posts
(
posts
):
output_posts
=
[]
...
...
lib/index_cleaner.py
View file @
4ffee32a
from
elasticsearch
import
Elasticsearch
,
NotFoundError
import
logging
logging
.
basicConfig
()
logger
=
logging
.
getLogger
(
__name__
)
if
__name__
!=
'__main__'
:
gunicorn_logger
=
logging
.
getLogger
(
'gunicorn.error'
)
logger
.
handlers
=
gunicorn_logger
.
handlers
logger
.
setLevel
(
gunicorn_logger
.
level
)
def
index_cleaner
(
cfg
,
content_type
,
content_id
=
None
):
...
...
@@ -12,7 +23,7 @@ def index_cleaner( cfg, content_type, content_id=None ):
# Cleaning up the ingest and digest indexes...
if
content_id
==
None
:
print
(
'Deleting the ingest and digest indexes...'
)
logger
.
info
(
'Deleting the ingest and digest indexes...'
)
successful
=
True
try
:
...
...
@@ -20,7 +31,7 @@ def index_cleaner( cfg, content_type, content_id=None ):
except
NotFoundError
:
pass
except
Exception
as
e
:
print
(
e
)
logger
.
critical
(
e
)
successful
=
False
try
:
...
...
@@ -28,12 +39,12 @@ def index_cleaner( cfg, content_type, content_id=None ):
except
NotFoundError
:
pass
except
Exception
as
e
:
print
(
e
)
logger
.
critical
(
e
)
successful
=
False
else
:
print
(
'Trying to delete content with id = %s...'
%
content_id
)
logging
.
info
(
'Trying to delete content with id = %s...'
%
content_id
)
successful
=
True
...
...
@@ -42,7 +53,7 @@ def index_cleaner( cfg, content_type, content_id=None ):
except
NotFoundError
:
pass
except
Exception
as
e
:
print
(
e
)
logging
.
critical
(
e
)
successful
=
False
try
:
...
...
@@ -50,7 +61,7 @@ def index_cleaner( cfg, content_type, content_id=None ):
except
NotFoundError
:
pass
except
Exception
as
e
:
print
(
e
)
logging
.
critical
(
e
)
successful
=
False
return
successful
lib/indexer.py
View file @
4ffee32a
...
...
@@ -3,6 +3,16 @@ import time
import
hashlib
from
elasticsearch
import
Elasticsearch
import
logging
logging
.
basicConfig
()
logger
=
logging
.
getLogger
(
__name__
)
if
__name__
!=
'__main__'
:
gunicorn_logger
=
logging
.
getLogger
(
'gunicorn.error'
)
logger
.
handlers
=
gunicorn_logger
.
handlers
logger
.
setLevel
(
gunicorn_logger
.
level
)
def
index_docs
(
cfg
,
content_type
,
docs
):
es_url
=
cfg
[
'indexer'
][
'url'
]
...
...
@@ -52,19 +62,13 @@ def index_docs( cfg, content_type, docs ):
}
}
# try:
# # delete index, in case it already exists
# rep = es.indices.delete(es_index)
# except:
# pass
try
:
# create index, in case it doesn't exist yet
rep
=
es
.
indices
.
create
(
es_index
,
es_body
)
except
Exception
as
e
:
print
(
e
)
print
(
"Pushing %i documents into Elasticsearch..."
%
len
(
docs
))
logger
.
info
(
"Pushing %i documents into Elasticsearch..."
%
len
(
docs
))
header
=
{
"index"
:
{
...
...
@@ -93,9 +97,9 @@ def index_docs( cfg, content_type, docs ):
if
rep
[
'errors'
]
==
False
:
#print("")
print
(
"| Done in "
,
t2
-
t1
,
' seconds.'
)
logging
.
info
(
"| Done in "
,
t2
-
t1
,
' seconds.'
)
return
True
else
:
#print("")
print
(
"| Failed."
)
logging
.
critical
(
"| Failed."
)
return
False
lib/reindexer.py
View file @
4ffee32a
from
elasticsearch
import
Elasticsearch
import
time
from
pprint
import
pprint
import
logging
logging
.
basicConfig
()
logger
=
logging
.
getLogger
(
__name__
)
if
__name__
!=
'__main__'
:
gunicorn_logger
=
logging
.
getLogger
(
'gunicorn.error'
)
logger
.
handlers
=
gunicorn_logger
.
handlers
logger
.
setLevel
(
gunicorn_logger
.
level
)
from
.elastic_mapping_template_pages
import
template
as
template_pages
from
.elastic_mapping_template_posts
import
template
as
template_posts
def
reindexer
(
cfg
,
content_type
,
content_id
=
None
):
template
=
eval
(
'template_%s'
%
content_type
)
...
...
@@ -19,27 +29,18 @@ def reindexer(cfg, content_type, content_id=None):
try
:
rep
=
destin_es
.
indices
.
delete_template
(
cfg
[
'reindexer'
][
'template'
][
content_type
])
#print(rep)
except
Exception
as
e
:
logger
.
error
(
e
)
pass
rep
=
destin_es
.
indices
.
put_template
(
cfg
[
'reindexer'
][
'template'
][
content_type
],
template
)
# rep = es.indices.get_template("template_1")
# print(rep)
# t1 = time.time()
# try:
# rep = destination_es.indices.delete(cfg['reindexer']['destination_index'])
# #print(rep)
# except:
# pass
try
:
rep
=
destin_es
.
indices
.
create
(
cfg
[
'reindexer'
][
'destination_index'
][
content_type
])
#print(rep)
except
Exception
as
e
:
logger
.
error
(
e
)
pass
body
=
{
...
...
@@ -66,10 +67,7 @@ def reindexer(cfg, content_type, content_id=None):
if
content_id
!=
None
:
body
[
'source'
][
'query'
]
=
{
"term"
:
{
"_id"
:
content_id
}}
print
(
body
)
# waiting for source index to be stable
# waiting for source index to be stable
count1
=
0
count2
=
1
...
...
@@ -85,7 +83,7 @@ def reindexer(cfg, content_type, content_id=None):
#if rep['failures'] == []:
if
'task'
in
rep
:
task_url
=
cfg
[
'reindexer'
][
'destination_url'
]
+
'/_tasks/'
+
rep
[
'task'
]
print
(
"Task URL: %s"
%
task_url
)
logger
.
info
(
"Task URL: %s"
%
task_url
)
return
task_url
else
:
return
False
lib/content_processor.py
→
lib/
wp_
content_processor.py
View file @
4ffee32a
import
logging
logging
.
basicConfig
()
logger
=
logging
.
getLogger
(
__name__
)
if
__name__
!=
'__main__'
:
gunicorn_logger
=
logging
.
getLogger
(
'gunicorn.error'
)
logger
.
handlers
=
gunicorn_logger
.
handlers
logger
.
setLevel
(
gunicorn_logger
.
level
)
def
process_posts
(
posts
):
output_posts
=
[]
...
...
@@ -26,20 +36,8 @@ def process_posts( posts ):
output_post
[
'author'
]
=
post
[
'_embedded'
][
'author'
][
0
][
'name'
]
output_post
[
'slug'
]
=
post
[
'slug'
]
#print(json.dumps(output_post, indent=4))
#exit(1)
#print(output_post)
#exit(1)
output_posts
.
append
(
output_post
)
#exit(1)
# print output
return
output_posts
...
...
main.py
View file @
4ffee32a
import
json
from
elasticsearch
import
Elasticsearch
from
flask
import
current_app
as
my_app
#from lib.content_getter import get_posts_by_page, get_post
from
lib.ghost_content_processor
import
process_posts
,
process_pages
...
...
@@ -9,44 +10,49 @@ from lib.alias_setter import alias_setter
from
lib.index_cleaner
import
index_cleaner
from
lib.ghost_content_getter
import
get_token
,
get_all_content_by_page
,
get_content
import
logging
logging
.
root
.
handlers
=
[]
logging
.
basicConfig
(
format
=
"%(asctime)s [%(process)d] [%(levelname)s] [%(name)s] %(message)s"
,
datefmt
=
"[%Y-%m-%d %H:%M:%S %z]"
)
logger
=
logging
.
getLogger
(
__name__
)
def
index_everything
(
cfg
,
content_type
):
print
(
'Starting...'
)
logger
.
info
(
'Starting...'
)
index_cleaner
(
cfg
,
content_type
)
print
(
'Getting %s...'
%
content_type
)
logger
.
info
(
'Getting %s...'
%
content_type
)
content_pages
=
get_all_content_by_page
(
cfg
,
content_type
)
cnt
=
1
for
content_page
in
content_pages
:
print
(
'Processing content page no. %i...'
%
cnt
)
logging
.
info
(
'Processing content page no. %i...'
%
cnt
)
#processed_page = process_posts(page)
processed_content_page
=
eval
(
"process_%s"
%
content_type
)(
content_page
[
content_type
]
)
successful
=
index_docs
(
cfg
,
content_type
,
processed_content_page
)
if
not
successful
:
print
(
'Something went wrong
. E
xiting.
..
'
)
logger
.
critical
(
'Something went wrong
upon indexing docs: e
xiting.'
)
exit
(
1
)
#print(processed_page)
cnt
+=
1
print
(
'Reindexing...'
)
logging
.
info
(
'Reindexing...'
)
task_url
=
reindexer
(
cfg
,
content_type
)
#print(task_url)
print
(
'Setting aliases...'
)
logging
.
info
(
'Setting aliases...'
)
successful
=
alias_setter
(
cfg
,
content_type
)
if
not
successful
:
print
(
'Something went wrong
. E
xiting.
..
'
)
logging
.
critical
(
'Something went wrong
upon setting aliases: e
xiting.'
)
exit
(
1
)
print
(
'done.'
)
logging
.
info
(
'done.'
)
return
task_url
...
...
@@ -56,27 +62,27 @@ def add_content( cfg, content_type, data ):
# N.B. : pages -> page; posts -> post, that's why we remove the last letter from content_type
content_id
=
data
[
content_type
[
0
:
len
(
content_type
)
-
1
]
][
'current'
][
'id'
]
print
(
'Getting content with id = %s...'
%
content_id
)
logging
.
info
(
'Getting content with id = %s...'
%
content_id
)
try
:
content
=
get_content
(
cfg
,
content_type
,
content_id
)
except
Exception
as
e
:
print
(
e
)
logging
.
critical
(
e
)
return
False
processed_content
=
eval
(
'process_%s'
%
content_type
)([
content
])
successful
=
index_cleaner
(
cfg
,
content_type
,
content_id
)
if
not
successful
:
raise
Exception
(
'Something went wrong
. E
xiting.
..
'
)
raise
Exception
(
'Something went wrong
upon cleaning indices: e
xiting.'
)
exit
(
1
)
successful
=
index_docs
(
cfg
,
content_type
,
processed_content
)
if
not
successful
:
raise
Exception
(
'Something went wrong
. E
xiting.
..
'
)
raise
Exception
(
'Something went wrong
upon indexing docs: e
xiting.'
)
exit
(
1
)
print
(
'Reindexing...'
)
logging
.
info
(
'Reindexing...'
)
task_url
=
reindexer
(
cfg
,
content_type
,
content_id
)
#print(task_url)
...
...
@@ -84,7 +90,7 @@ def add_content( cfg, content_type, data ):
raise
Exception
(
'Something went wrong. Exiting...'
)
exit
(
1
)
print
(
'done.'
)
logging
.
info
(
'done.'
)
return
task_url
...
...
@@ -107,18 +113,15 @@ def update_content( cfg, content_type, data ):
if
__name__
==
"__main__"
:
import
yaml
from
yaml
import
load
,
dump
try
:
from
yaml
import
CLoader
as
Loader
,
CDumper
as
Dumper
except
ImportError
:
from
yaml
import
Loader
,
Dumper
with
open
(
"config.yaml"
,
'r'
)
as
yamlfile
:
cfg
=
yaml
.
load
(
yamlfile
)
cfg
=
load
(
yamlfile
,
Loader
=
Loader
)
index_everything
(
cfg
,
'posts'
)
index_everything
(
cfg
,
'pages'
)
#exit(0)
# with open('output/post_created.json', 'r') as fp:
# data = json.load(fp)
# add_post(cfg, data)
#
# with open('output/post_deleted.json', 'r') as fp:
# data = json.load(fp)
# delete_post(cfg, data)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment