Commit 8af1593f authored by Alessandro CERIONI's avatar Alessandro CERIONI
Browse files

Getting posts from Ghost. Add/update/delete post methods were implemented.

parent 26d4b66a
FROM python:slim
# cf. https://github.com/nodesource/distributions/blob/master/README.md#installation-instructions
RUN apt update && apt install -y curl gnupg
RUN curl -sL https://deb.nodesource.com/setup_11.x | bash -
RUN apt install -y nodejs
WORKDIR /app
COPY requirements.txt .
COPY package.json .
COPY package-lock.json .
COPY lib/*.py ./lib/
COPY lib/jwt_factory.js ./lib/
COPY templates/*.html ./templates/
COPY api.py .
COPY main.py .
RUN pip install -r requirements.txt
RUN npm install
EXPOSE 8000
......
from flask import Flask
from flask import jsonify, render_template
from flask import jsonify, render_template, request
import time
import json
import yaml
from main import main
from main import index_everything, add_post, update_post, delete_post
with open("config.yaml", 'r') as yamlfile:
cfg = yaml.load(yamlfile)
api = Flask(__name__, static_url_path='')
......@@ -10,12 +15,46 @@ api = Flask(__name__, static_url_path='')
def hello():
return render_template('index.html')
@api.route("/index/posts", defaults={'the_post_id': None})
@api.route("/index/posts/<string:the_post_id>")
def index_posts(the_post_id):
task_url = main(the_post_id)
# @api.route("/index/posts", defaults={'the_post_id': None})
# @api.route("/index/posts/<string:the_post_id>")
# def index_posts(the_post_id):
# task_url = main(the_post_id)
# return jsonify( {'reindex_task_url': task_url} )
# @api.route("/test", methods=['GET', 'POST'])
# def test():
# data = request.data # data is empty
# #print(dir(request.data))
# json_data = json.loads(data)
# with open('output/output.json', 'w') as fp:
# json.dump(json_data, fp)
# #print( json.dumps(json_data, indent=4) )
# # need posted data here
# return jsonify( {'status': 'OK'} )
@api.route("/index/posts", methods=['GET'])
def _index_posts():
task_url = index_everything(cfg)
return jsonify( {'reindex_task_url': task_url} )
@api.route("/add/post", methods=['POST'])
def _add_post():
json_data = json.loads(request.data)
task_url = add_post(cfg, json_data)
return jsonify( {'reindex_task_url': task_url} )
@api.route("/update/post", methods=['POST'])
def _update_post():
json_data = json.loads(request.data)
task_url = update_post(cfg, json_data)
return jsonify( {'reindex_task_url': task_url} )
@api.route("/delete/post", methods=['POST'])
def _delete_post():
json_data = json.loads(request.data)
status = delete_post(cfg, json_data)
return jsonify( {'status': status} )
if __name__ == '__main__':
api.run(host='0.0.0.0', port=8000, debug=True)
......@@ -2,6 +2,8 @@ content_getter:
wp_api: <the_wordpress_api_url>
wp_username: <the_wordpress_username>
wp_password: <the_wordpress_password>
ghost_api: <the_ghost_api_url, ex. https://your-domain/ghost/api>
ghost_admin_api_key: <the_ghost_admin_api_key>
indexer:
url: <the_elasticsearch_url>
......
......@@ -5,7 +5,7 @@ def alias_setter(cfg):
es = Elasticsearch( [cfg['reindexer']['destination_url']] )
suffixes = ['publish', 'draft', 'private']
suffixes = ['published', 'draft'] #, 'private']
successful = True
for suffix in suffixes:
......@@ -19,11 +19,14 @@ def alias_setter(cfg):
}
}
#print(the_body)
try:
res = es.indices.delete_alias(index='*', name=alias)
except Exception as e:
print(e)
res = es.indices.put_alias(index=cfg['reindexer']['destination_index'],
name=alias,
body=json.dumps(the_body))
body=json.dumps(the_body))
#print(res)
successful *= res['acknowledged']
......
#import msgpack
import requests
import json
import subprocess
#import pika
def get_token( admin_key_key ):
cmd = ['node', 'lib/jwt_factory.js']
process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, universal_newlines=True)
JWT = process.communicate( input=admin_key_key )[0]
return JWT
def get_post( cfg, the_post_id ):
root_url = cfg['content_getter']['ghost_api']
admin_api_key = cfg['content_getter']['ghost_admin_api_key']
# username = cfg['content_getter']['wp_username']
# password = cfg['content_getter']['wp_password']
jwt = get_token(admin_api_key)
#print("JWT = ", jwt)
# cf. https://docs.ghost.org/api/admin/
headers = {}
headers['Authorization'] = 'Ghost %s' % jwt
# getting 1st page, in order to also know how many pages we have to go for...
res = requests.get( root_url + '/v2/admin/posts/%s?include=authors,tags&formats=html,plaintext' % (the_post_id), headers=headers )
if res.status_code == 200:
return res.json()['posts'][0]
else:
raise Exception('Post not found.')
# print(dir(res))
def get_posts_by_page( cfg, no_posts_per_page=10 ):
root_url = cfg['content_getter']['ghost_api']
admin_api_key = cfg['content_getter']['ghost_admin_api_key']
# username = cfg['content_getter']['wp_username']
# password = cfg['content_getter']['wp_password']
jwt = get_token(admin_api_key)
#print("JWT = ", jwt)
# cf. https://docs.ghost.org/api/admin/
headers = {}
headers['Authorization'] = 'Ghost %s' % jwt
#
page_no = 1
# getting 1st page, in order to also know how many pages we have to go for...
res = requests.get( root_url + '/v2/admin/posts?include=authors,tags&formats=html,plaintext&limit=%i&page=%i' % (no_posts_per_page, page_no), headers=headers )
yield res.json()
pagination = res.json()['meta']['pagination']
pages = pagination['pages']
page_no += 1
while page_no <= pages :
res = requests.get( root_url + '/v2/admin/posts?include=authors,tags&formats=html,plaintext&limit=%i&page=%i' % (no_posts_per_page, page_no), headers=headers )
#print(page_no, res.json()['meta'])
page_no += 1
yield res.json()
#else:
# return None
#return None
if __name__ == '__main__':
print('Nothing do. Exiting.')
def process_posts( posts ):
output_posts = []
for post in posts:
output_post = post.copy()
output_posts.append(output_post)
# print output
return output_posts
if __name__ == '__main__':
posts = [{u'og_image': None, u'codeinjection_foot': None, u'plaintext': u'Le contenu\n\nL\xe9gende\n--------------------------------------------------------------------------------\n\n/image \nhttps://highway-to-data.alpha.grandlyon.com/wp2-intothesky/2019/02/home-smartdata.jpg\n\nthis is another update', u'excerpt': u'Le contenu\n\nL\xe9gende\n--------------------------------------------------------------------------------\n\n/image \nhttps://highway-to-data.alpha.grandlyon.com/wp2-intothesky/2019/02/home-smartdata.jpg\n\nthis is another update', u'updated_at': u'2019-03-03T16:49:28.000Z', u'custom_template': None, u'published_at': u'2019-03-03T16:39:59.000Z', u'og_title': None, u'twitter_image': None, u'id': u'5c7906a70a48e60001b47cc4', u'twitter_description': None, u'meta_description': None, u'uuid': u'c9204cc3-da09-4759-abf9-36fab98c6e6e', u'title': u'Mon premier brouillon', u'twitter_title': None, u'html': u'<p>Le contenu</p><!--kg-card-begin: image--><figure class="kg-card kg-image-card kg-card-hascaption"><img src="https://ghost-intothesky.alpha.grandlyon.com/content/images/2019/03/Screenshot-from-2019-02-14-14-09-51.png" class="kg-image"><figcaption>L\xe9gende</figcaption></figure><!--kg-card-end: image--><!--kg-card-begin: hr--><hr><!--kg-card-end: hr--><!--kg-card-begin: markdown--><p>/image <a href="https://highway-to-data.alpha.grandlyon.com/wp2-intothesky/2019/02/home-smartdata.jpg">https://highway-to-data.alpha.grandlyon.com/wp2-intothesky/2019/02/home-smartdata.jpg</a></p>\n<!--kg-card-end: markdown--><p>this is another update</p>', u'primary_author': {u'website': None, u'bio': None, u'meta_title': None, u'name': u'Alessandro Cerioni', u'updated_at': u'2019-03-03T16:29:36.000Z', u'twitter': None, u'profile_image': None, u'accessibility': None, u'id': u'1', u'status': u'active', u'tour': u'["getting-started","featured-post"]', u'url': u'https://ghost-intothesky.alpha.grandlyon.com/author/alessandro/', u'facebook': None, u'location': None, u'cover_image': None, u'created_at': u'2019-03-01T08:26:45.000Z', u'meta_description': None, u'slug': u'alessandro', u'last_seen': u'2019-03-03T16:29:36.000Z'}, u'status': u'draft', u'primary_tag': {u'meta_description': None, u'meta_title': None, u'feature_image': None, u'name': u'test_tag', u'url': u'https://ghost-intothesky.alpha.grandlyon.com/404/', u'created_at': u'2019-03-03T16:45:44.000Z', u'updated_at': u'2019-03-03T16:45:44.000Z', u'visibility': u'public', u'id': u'5c7c04b8bf1a290001e7eb66', u'slug': u'test_tag', u'description': None}, u'meta_title': None, u'og_description': None, u'feature_image': None, u'tags': [{u'meta_description': None, u'meta_title': None, u'feature_image': None, u'name': u'test_tag', u'url': u'https://ghost-intothesky.alpha.grandlyon.com/404/', u'created_at': u'2019-03-03T16:45:44.000Z', u'updated_at': u'2019-03-03T16:45:44.000Z', u'visibility': u'public', u'id': u'5c7c04b8bf1a290001e7eb66', u'slug': u'test_tag', u'description': None}], u'comment_id': u'5c7906a70a48e60001b47cc4', u'custom_excerpt': None, u'authors': [{u'website': None, u'bio': None, u'meta_title': None, u'name': u'Alessandro Cerioni', u'updated_at': u'2019-03-03T16:29:36.000Z', u'twitter': None, u'profile_image': None, u'accessibility': None, u'id': u'1', u'status': u'active', u'tour': u'["getting-started","featured-post"]', u'url': u'https://ghost-intothesky.alpha.grandlyon.com/author/alessandro/', u'facebook': None, u'location': None, u'cover_image': None, u'created_at': u'2019-03-01T08:26:45.000Z', u'meta_description': None, u'slug': u'alessandro', u'last_seen': u'2019-03-03T16:29:36.000Z'}], u'slug': u'mon-premier-brouillon', u'url': u'https://ghost-intothesky.alpha.grandlyon.com/404/', u'created_at': u'2019-03-01T10:17:11.000Z', u'featured': True, u'codeinjection_head': None}]
output = process_posts(posts)
print(output)
......@@ -10,38 +10,41 @@ def index_cleaner( cfg, post_id=None ):
if post_id == None:
print('Deleting the ingest and digest indexes...')
successful = True
try:
res = source_es.indices.delete(cfg['reindexer']['source_index'])
except Exception as e:
print(e)
successful = False
try:
rep = destin_es.indices.delete(cfg['reindexer']['destination_index'])
#print(rep)
except Exception as e:
print(e)
successful = False
else:
print('Deleting post with uuid=%s from the ingest and digest index...' % post_id)
the_query = {
"query": {
"match": {
"uuid.keyword": post_id
}
}
}
print('Trying to delete post with id = %s...' % post_id )
source_es = Elasticsearch([cfg['reindexer']['source_url']])
destin_es = Elasticsearch([cfg['reindexer']['destination_url']])
successful = True
try:
res = source_es.delete_by_query(index=cfg['reindexer']['source_index'], doc_type='_doc', body=the_query)
#print(res)
res = source_es.delete(index=cfg['reindexer']['source_index'], doc_type='_doc', id=post_id)
except Exception as e:
print(e)
successful = False
pass
try:
res = destin_es.delete_by_query(index=cfg['reindexer']['destination_index'], doc_type='_doc', body=the_query)
#print(res)
res = source_es.delete(index=cfg['reindexer']['destination_index'], doc_type='_doc', id=post_id)
except Exception as e:
print(e)
#exit(1)
successful = False
pass
return
return successful
......@@ -62,9 +62,7 @@ def index_docs( cfg, docs ):
# create index, in case it doesn't exist yet
rep = es.indices.create(es_index, es_body)
except Exception as e:
pass
print(e)
print("Pushing %i documents into Elasticsearch..." % len(docs))
......@@ -78,11 +76,12 @@ def index_docs( cfg, docs ):
es_body = ''
for doc in docs:
try:
header['index']['_id'] = doc['uuid'] #hashlib.md5( json.dumps(doc, sort_keys=True).encode("utf-8") ).hexdigest()
#del doc['uuid']
except:
header['index']['_id'] = hashlib.md5( json.dumps(doc, sort_keys=True).encode("utf-8") ).hexdigest()
# try:
# header['index']['_id'] = doc['id'] #hashlib.md5( json.dumps(doc, sort_keys=True).encode("utf-8") ).hexdigest()
# #del doc['uuid']
# except:
# header['index']['_id'] = hashlib.md5( json.dumps(doc, sort_keys=True).encode("utf-8") ).hexdigest()
header['index']['_id'] = doc['id']
#print(header)
es_body += '{0}\n{1}\n'.format(json.dumps(header), json.dumps(doc))
......
const jwt = require('jsonwebtoken');
const ByteBuffer = require("bytebuffer");
const readline = require('readline');
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout
});
rl.on('line', (input) => {
// console.log(`Received: ${input}`);
const [id, hexSecret] = input.split(':');
const bb = ByteBuffer.fromHex(hexSecret);
const now = + new Date();
const payload = {
"exp": now + 300,
"iat": now,
"aud": "/v2/admin/"
};
const header = {
"alg": "HS256",
"typ": "JWT",
"kid": id
};
const token = jwt.sign(payload, Buffer.from(bb.toBase64(), 'base64'), { algorithm: 'HS256', header: header });
process.stdout.write(token);
rl.close();
});
import json
from elasticsearch import Elasticsearch
from lib.content_getter import get_posts_by_page, get_post
from lib.content_processor import process_posts
#from lib.content_getter import get_posts_by_page, get_post
from lib.ghost_content_processor import process_posts
from lib.indexer import index_docs
from lib.reindexer import reindexer
from lib.alias_setter import alias_setter
from lib.index_cleaner import index_cleaner
from lib.ghost_content_getter import get_token, get_posts_by_page, get_post
def main( post_id=None ):
import yaml
with open("config.yaml", 'r') as yamlfile:
cfg = yaml.load(yamlfile)
def index_everything( cfg ):
print('Starting...')
index_cleaner(cfg, post_id)
index_cleaner(cfg)
if post_id == None:
print('Getting posts...')
pages = get_posts_by_page(cfg)
print('Getting posts...')
pages = get_posts_by_page(cfg)
cnt = 1
for page in pages:
#print(page)
cnt = 1
for page in pages:
print('Processing page no. %i...' % cnt)
processed_page = process_posts(page)
print('Processing page no. %i...' % cnt)
#processed_page = process_posts(page)
processed_page = process_posts(page['posts'])
successful = index_docs(cfg, processed_page)
if not successful:
print('Something went wrong. Exiting...')
exit(1)
#print(processed_page)
cnt += 1
else:
print('Getting post with uuid=%s...' % post_id)
post = get_post(cfg, post_id)
processed_post = process_posts([post])
successful = index_docs(cfg, processed_post)
successful = index_docs(cfg, processed_page)
if not successful:
print('Something went wrong. Exiting...')
exit(1)
#print(processed_page)
cnt += 1
print('Reindexing...')
task_url = reindexer(cfg)
......@@ -67,6 +51,69 @@ def main( post_id=None ):
return task_url
def add_post( cfg, data ):
# with open('output/post_created.json', 'r') as fp:
# data = json.load(fp)
post_id = data['post']['current']['id']
print('Getting post with id=%s...' % post_id)
try:
post = get_post(cfg, post_id)
except Exception as e:
print(e)
return False
processed_post = process_posts([post])
successful = index_docs(cfg, processed_post)
if not successful:
raise Exception('Something went wrong. Exiting...')
exit(1)
print('Reindexing...')
task_url = reindexer(cfg)
#print(task_url)
if not successful:
raise Exception('Something went wrong. Exiting...')
exit(1)
print('done.')
return task_url
def delete_post( cfg, data ):
post_id = data['post']['previous']['id']
return index_cleaner(cfg, post_id)
def update_post( cfg, data ):
# post_id = data['post']['current']['id']
# index_cleaner(cfg, post_id)
return add_post( cfg, data )
if __name__ == "__main__":
main()
import yaml
with open("config.yaml", 'r') as yamlfile:
cfg = yaml.load(yamlfile)
index_everything(cfg)
#exit(0)
# with open('output/post_created.json', 'r') as fp:
# data = json.load(fp)
# add_post(cfg, data)
#
# with open('output/post_deleted.json', 'r') as fp:
# data = json.load(fp)
# delete_post(cfg, data)
{
"requires": true,
"lockfileVersion": 1,
"dependencies": {
"buffer-equal-constant-time": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz",
"integrity": "sha1-+OcRMvf/5uAaXJaXpMbz5I1cyBk="
},
"bytebuffer": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/bytebuffer/-/bytebuffer-5.0.1.tgz",
"integrity": "sha1-WC7qSxqHO20CCkjVjfhfC7ps/d0=",
"requires": {
"long": "~3"
}
},
"ecdsa-sig-formatter": {
"version": "1.0.11",
"resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz",
"integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==",
"requires": {
"safe-buffer": "^5.0.1"
}
},
"jsonwebtoken": {
"version": "8.5.0",
"resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-8.5.0.tgz",
"integrity": "sha512-IqEycp0znWHNA11TpYi77bVgyBO/pGESDh7Ajhas+u0ttkGkKYIIAjniL4Bw5+oVejVF+SYkaI7XKfwCCyeTuA==",
"requires": {
"jws": "^3.2.1",
"lodash.includes": "^4.3.0",
"lodash.isboolean": "^3.0.3",
"lodash.isinteger": "^4.0.4",
"lodash.isnumber": "^3.0.3",
"lodash.isplainobject": "^4.0.6",
"lodash.isstring": "^4.0.1",
"lodash.once": "^4.0.0",
"ms": "^2.1.1",
"semver": "^5.6.0"
}
},
"jwa": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/jwa/-/jwa-1.4.0.tgz",
"integrity": "sha512-mt6IHaq0ZZWDBspg0Pheu3r9sVNMEZn+GJe1zcdYyhFcDSclp3J8xEdO4PjZolZ2i8xlaVU1LetHM0nJejYsEw==",
"requires": {
"buffer-equal-constant-time": "1.0.1",
"ecdsa-sig-formatter": "1.0.11",
"safe-buffer": "^5.0.1"
}
},
"jws": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/jws/-/jws-3.2.1.tgz",
"integrity": "sha512-bGA2omSrFUkd72dhh05bIAN832znP4wOU3lfuXtRBuGTbsmNmDXMQg28f0Vsxaxgk4myF5YkKQpz6qeRpMgX9g==",
"requires": {
"jwa": "^1.2.0",
"safe-buffer": "^5.0.1"
}
},
"lodash.includes": {
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/lodash.includes/-/lodash.includes-4.3.0.tgz",
"integrity": "sha1-YLuYqHy5I8aMoeUTJUgzFISfVT8="
},
"lodash.isboolean": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/lodash.isboolean/-/lodash.isboolean-3.0.3.tgz",
"integrity": "sha1-bC4XHbKiV82WgC/UOwGyDV9YcPY="
},
"lodash.isinteger": {
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/lodash.isinteger/-/lodash.isinteger-4.0.4.tgz",
"integrity": "sha1-YZwK89A/iwTDH1iChAt3sRzWg0M="
},
"lodash.isnumber": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/lodash.isnumber/-/lodash.isnumber-3.0.3.tgz",
"integrity": "sha1-POdoEMWSjQM1IwGsKHMX8RwLH/w="
},
"lodash.isplainobject": {
"version": "4.0.6",
"resolved": "https://registry.npmjs.org/lodash.isplainobject/-/lodash.isplainobject-4.0.6.tgz",
"integrity": "sha1-fFJqUtibRcRcxpC4gWO+BJf1UMs="
},
"lodash.isstring": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/lodash.isstring/-/lodash.isstring-4.0.1.tgz",
"integrity": "sha1-1SfftUVuynzJu5XV2ur4i6VKVFE="
},
"lodash.once": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/lodash.once/-/lodash.once-4.1.1.tgz",
"integrity": "sha1-DdOXEhPHxW34gJd9UEyI+0cal6w="
},
"long": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/long/-/long-3.2.0.tgz",
"integrity": "sha1-2CG3E4yhy1gcFymQ7xTbIAtcR0s="
},
"ms": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz",
"integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg=="