Commit 4ffee32a authored by Alessandro CERIONI's avatar Alessandro CERIONI
Browse files

Improved logging

parent 1d9f8fea
Pipeline #3231 passed with stage
in 39 seconds
......@@ -3,14 +3,28 @@ from flask import jsonify, render_template, request
import time
import json
import yaml
import logging
from main import index_everything, add_content, update_content, delete_content
from yaml import load, dump
try:
from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
from yaml import Loader, Dumper
with open("config.yaml", 'r') as yamlfile:
cfg = yaml.load(yamlfile)
cfg = load(yamlfile, Loader=Loader)
api = Flask(__name__, static_url_path='')
# Many thanks to https://medium.com/@trstringer/logging-flask-and-gunicorn-the-manageable-way-2e6f0b8beb2f
if __name__ != '__main__':
gunicorn_logger = logging.getLogger('gunicorn.error')
api.logger.handlers = gunicorn_logger.handlers
api.logger.setLevel(gunicorn_logger.level)
@api.route("/")
def hello():
return render_template('index.html')
......@@ -21,14 +35,17 @@ def hello():
# task_url = main(the_post_id)
# return jsonify( {'reindex_task_url': task_url} )
@api.route("/test", methods=['GET', 'POST'])
@api.route("/test", methods=['GET'])
def test():
data = request.data.decode('utf-8') # data is empty
#print(dir(request.data))
json_data = json.loads(data)
with open('output/output.json', 'w') as fp:
json.dump(json_data, fp)
#print( json.dumps(json_data, indent=4) )
api.logger.info('***TEST***')
test_log()
# data = request.data.decode('utf-8') # data is empty
# #print(dir(request.data))
# json_data = json.loads(data)
# with open('output/output.json', 'w') as fp:
# json.dump(json_data, fp)
# #print( json.dumps(json_data, indent=4) )
# need posted data here
return jsonify( {'status': 'OK'} )
......
from elasticsearch import Elasticsearch
from elasticsearch import Elasticsearch, NotFoundError
import json
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
if __name__ != '__main__':
gunicorn_logger = logging.getLogger('gunicorn.error')
logger.handlers = gunicorn_logger.handlers
logger.setLevel(gunicorn_logger.level)
def alias_setter(cfg, content_type):
es = Elasticsearch( [cfg['reindexer']['destination_url']] )
......@@ -22,8 +31,10 @@ def alias_setter(cfg, content_type):
try:
res = es.indices.delete_alias(index='*', name=alias)
except NotFoundError:
pass
except Exception as e:
print(e)
logger.error(e)
res = es.indices.put_alias( index=destination_index,
name=alias,
......
#import msgpack
import requests
import json
#import pika
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
if __name__ != '__main__':
gunicorn_logger = logging.getLogger('gunicorn.error')
logger.handlers = gunicorn_logger.handlers
logger.setLevel(gunicorn_logger.level)
def get_token( root_url, username, password ):
......@@ -12,8 +20,6 @@ def get_token( root_url, username, password ):
res = requests.post(url, data=payload)
#print(res.json())
token = res.json()['token']
return token
......@@ -33,7 +39,6 @@ def get_post( cfg, the_post_id ):
res = requests.get( root_url + '/wp/v2/posts/%s?_embed' % (the_post_id), headers=headers )
return res.json()
# print(dir(res))
def get_posts_by_page( cfg, no_posts_per_page=10 ):
......@@ -52,17 +57,7 @@ def get_posts_by_page( cfg, no_posts_per_page=10 ):
while page_no <= total_pages:
# print('*'*100)
# print('page_no: ', page_no)
res = requests.get( root_url + '/wp/v2/posts?_embed&per_page=%s&page=%s&status=any' % (no_posts_per_page, page_no), headers=headers )
# print(dir(res))
# print(res.url)
#print( json.dumps(res.json(), indent=4) )
#print( res.headers )
#print( res.headers['X-WP-Total'], res.headers['X-WP-TotalPages'])
# exit(1)
total_pages = int(res.headers['X-WP-TotalPages'])
page_no += 1
......@@ -76,4 +71,4 @@ def get_posts_by_page( cfg, no_posts_per_page=10 ):
if __name__ == '__main__':
print('Nothing do. Exiting.')
logger.info('Nothing do. Exiting.')
#import msgpack
import requests
import json
import subprocess
#import pika
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
if __name__ != '__main__':
gunicorn_logger = logging.getLogger('gunicorn.error')
logger.handlers = gunicorn_logger.handlers
logger.setLevel(gunicorn_logger.level)
def get_token( admin_key_key ):
......@@ -89,4 +98,4 @@ def get_all_content_by_page( cfg, the_content_type, no_elements_per_page=10 ):
if __name__ == '__main__':
print('Nothing do. Exiting.')
logger.info('Nothing do. Exiting.')
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
if __name__ != '__main__':
gunicorn_logger = logging.getLogger('gunicorn.error')
logger.handlers = gunicorn_logger.handlers
logger.setLevel(gunicorn_logger.level)
def process_posts( posts ):
output_posts = []
......
from elasticsearch import Elasticsearch, NotFoundError
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
if __name__ != '__main__':
gunicorn_logger = logging.getLogger('gunicorn.error')
logger.handlers = gunicorn_logger.handlers
logger.setLevel(gunicorn_logger.level)
def index_cleaner( cfg, content_type, content_id=None ):
......@@ -12,7 +23,7 @@ def index_cleaner( cfg, content_type, content_id=None ):
# Cleaning up the ingest and digest indexes...
if content_id == None:
print('Deleting the ingest and digest indexes...')
logger.info('Deleting the ingest and digest indexes...')
successful = True
try:
......@@ -20,7 +31,7 @@ def index_cleaner( cfg, content_type, content_id=None ):
except NotFoundError:
pass
except Exception as e:
print(e)
logger.critical(e)
successful = False
try:
......@@ -28,12 +39,12 @@ def index_cleaner( cfg, content_type, content_id=None ):
except NotFoundError:
pass
except Exception as e:
print(e)
logger.critical(e)
successful = False
else:
print('Trying to delete content with id = %s...' % content_id )
logging.info('Trying to delete content with id = %s...' % content_id )
successful = True
......@@ -42,7 +53,7 @@ def index_cleaner( cfg, content_type, content_id=None ):
except NotFoundError:
pass
except Exception as e:
print(e)
logging.critical(e)
successful = False
try:
......@@ -50,7 +61,7 @@ def index_cleaner( cfg, content_type, content_id=None ):
except NotFoundError:
pass
except Exception as e:
print(e)
logging.critical(e)
successful = False
return successful
......@@ -3,6 +3,16 @@ import time
import hashlib
from elasticsearch import Elasticsearch
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
if __name__ != '__main__':
gunicorn_logger = logging.getLogger('gunicorn.error')
logger.handlers = gunicorn_logger.handlers
logger.setLevel(gunicorn_logger.level)
def index_docs( cfg, content_type, docs ):
es_url = cfg['indexer']['url']
......@@ -52,19 +62,13 @@ def index_docs( cfg, content_type, docs ):
}
}
# try:
# # delete index, in case it already exists
# rep = es.indices.delete(es_index)
# except:
# pass
try:
# create index, in case it doesn't exist yet
rep = es.indices.create(es_index, es_body)
except Exception as e:
print(e)
print("Pushing %i documents into Elasticsearch..." % len(docs))
logger.info("Pushing %i documents into Elasticsearch..." % len(docs))
header = {
"index" : {
......@@ -93,9 +97,9 @@ def index_docs( cfg, content_type, docs ):
if rep['errors'] == False:
#print("")
print("| Done in ", t2-t1, ' seconds.')
logging.info("| Done in ", t2-t1, ' seconds.')
return True
else:
#print("")
print("| Failed.")
logging.critical("| Failed.")
return False
from elasticsearch import Elasticsearch
import time
from pprint import pprint
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
if __name__ != '__main__':
gunicorn_logger = logging.getLogger('gunicorn.error')
logger.handlers = gunicorn_logger.handlers
logger.setLevel(gunicorn_logger.level)
from .elastic_mapping_template_pages import template as template_pages
from .elastic_mapping_template_posts import template as template_posts
def reindexer(cfg, content_type, content_id=None):
template = eval('template_%s' % content_type)
......@@ -19,27 +29,18 @@ def reindexer(cfg, content_type, content_id=None):
try:
rep = destin_es.indices.delete_template(cfg['reindexer']['template'][content_type])
#print(rep)
except Exception as e:
logger.error(e)
pass
rep = destin_es.indices.put_template(cfg['reindexer']['template'][content_type], template)
# rep = es.indices.get_template("template_1")
# print(rep)
# t1 = time.time()
# try:
# rep = destination_es.indices.delete(cfg['reindexer']['destination_index'])
# #print(rep)
# except:
# pass
try:
rep = destin_es.indices.create(cfg['reindexer']['destination_index'][content_type])
#print(rep)
except Exception as e:
logger.error(e)
pass
body = {
......@@ -66,10 +67,7 @@ def reindexer(cfg, content_type, content_id=None):
if content_id != None:
body['source']['query'] = {"term": {"_id": content_id}}
print(body)
# waiting for source index to be stable
# waiting for source index to be stable
count1 = 0
count2 = 1
......@@ -85,7 +83,7 @@ def reindexer(cfg, content_type, content_id=None):
#if rep['failures'] == []:
if 'task' in rep:
task_url = cfg['reindexer']['destination_url'] + '/_tasks/' + rep['task']
print("Task URL: %s" % task_url)
logger.info("Task URL: %s" % task_url)
return task_url
else:
return False
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
if __name__ != '__main__':
gunicorn_logger = logging.getLogger('gunicorn.error')
logger.handlers = gunicorn_logger.handlers
logger.setLevel(gunicorn_logger.level)
def process_posts( posts ):
output_posts = []
......@@ -26,20 +36,8 @@ def process_posts( posts ):
output_post['author'] = post['_embedded']['author'][0]['name']
output_post['slug'] = post['slug']
#print(json.dumps(output_post, indent=4))
#exit(1)
#print(output_post)
#exit(1)
output_posts.append(output_post)
#exit(1)
# print output
return output_posts
......
import json
from elasticsearch import Elasticsearch
from flask import current_app as my_app
#from lib.content_getter import get_posts_by_page, get_post
from lib.ghost_content_processor import process_posts, process_pages
......@@ -9,44 +10,49 @@ from lib.alias_setter import alias_setter
from lib.index_cleaner import index_cleaner
from lib.ghost_content_getter import get_token, get_all_content_by_page, get_content
import logging
logging.root.handlers = []
logging.basicConfig(format="%(asctime)s [%(process)d] [%(levelname)s] [%(name)s] %(message)s", datefmt="[%Y-%m-%d %H:%M:%S %z]")
logger = logging.getLogger(__name__)
def index_everything( cfg, content_type ):
print('Starting...')
logger.info('Starting...')
index_cleaner(cfg, content_type)
print('Getting %s...' % content_type)
logger.info('Getting %s...' % content_type)
content_pages = get_all_content_by_page(cfg, content_type)
cnt = 1
for content_page in content_pages:
print('Processing content page no. %i...' % cnt)
logging.info('Processing content page no. %i...' % cnt)
#processed_page = process_posts(page)
processed_content_page = eval("process_%s" % content_type)( content_page[content_type] )
successful = index_docs(cfg, content_type, processed_content_page)
if not successful:
print('Something went wrong. Exiting...')
logger.critical('Something went wrong upon indexing docs: exiting.')
exit(1)
#print(processed_page)
cnt += 1
print('Reindexing...')
logging.info('Reindexing...')
task_url = reindexer(cfg, content_type)
#print(task_url)
print('Setting aliases...')
logging.info('Setting aliases...')
successful = alias_setter(cfg, content_type)
if not successful:
print('Something went wrong. Exiting...')
logging.critical('Something went wrong upon setting aliases: exiting.')
exit(1)
print('done.')
logging.info('done.')
return task_url
......@@ -56,27 +62,27 @@ def add_content( cfg, content_type, data ):
# N.B. : pages -> page; posts -> post, that's why we remove the last letter from content_type
content_id = data[ content_type[0:len(content_type)-1] ]['current']['id']
print('Getting content with id = %s...' % content_id)
logging.info('Getting content with id = %s...' % content_id)
try:
content = get_content(cfg, content_type, content_id)
except Exception as e:
print(e)
logging.critical(e)
return False
processed_content = eval('process_%s' % content_type)([content])
successful = index_cleaner(cfg, content_type, content_id)
if not successful:
raise Exception('Something went wrong. Exiting...')
raise Exception('Something went wrong upon cleaning indices: exiting.')
exit(1)
successful = index_docs(cfg, content_type, processed_content)
if not successful:
raise Exception('Something went wrong. Exiting...')
raise Exception('Something went wrong upon indexing docs: exiting.')
exit(1)
print('Reindexing...')
logging.info('Reindexing...')
task_url = reindexer(cfg, content_type, content_id)
#print(task_url)
......@@ -84,7 +90,7 @@ def add_content( cfg, content_type, data ):
raise Exception('Something went wrong. Exiting...')
exit(1)
print('done.')
logging.info('done.')
return task_url
......@@ -107,18 +113,15 @@ def update_content( cfg, content_type, data ):
if __name__ == "__main__":
import yaml
from yaml import load, dump
try:
from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
from yaml import Loader, Dumper
with open("config.yaml", 'r') as yamlfile:
cfg = yaml.load(yamlfile)
cfg = load(yamlfile, Loader=Loader)
index_everything(cfg, 'posts')
index_everything(cfg, 'pages')
#exit(0)
# with open('output/post_created.json', 'r') as fp:
# data = json.load(fp)
# add_post(cfg, data)
#
# with open('output/post_deleted.json', 'r') as fp:
# data = json.load(fp)
# delete_post(cfg, data)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment