ghost_content_processor.py 5.89 KB
Newer Older
Alessandro CERIONI's avatar
Alessandro CERIONI committed
1
2
3
4
5
6
7
8
9
10
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)

if __name__ != '__main__':
    gunicorn_logger = logging.getLogger('gunicorn.error')
    logger.handlers = gunicorn_logger.handlers
    logger.setLevel(gunicorn_logger.level)


11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def find_relations( tags ):

    relations = dict()
    for tag in tags:
        if tag['name'].startswith('#dataset-slug:'):
            if 'datasets' not in relations.keys():
                relations['datasets'] = dict()
            if 'slugs' not in relations['datasets']:
                relations['datasets']['slugs'] = []
            relations['datasets']['slugs'].append( tag['name'].split(":")[1] )
        if tag['name'].startswith('#post-slug'):
            if 'posts' not in relations.keys():
                relations['posts'] = dict()
            if 'slugs' not in relations['posts']:
                relations['posts']['slugs'] = []
            relations['posts']['slugs'].append( tag['name'].split(":")[1] )
        if tag['name'].startswith('#page-slug:'):
            if 'pages' not in relations.keys():
                relations['datasets'] = dict()
            if 'slugs' not in relations['pages']:
                relations['pages']['slugs'] = []
            relations['pages']['slugs'].append( tag['name'].split(":")[1] )

    return relations



    return relations


41
42
43
44
45
46
def process_posts( posts ):

    output_posts = []

    for post in posts:

47
48
49
50
51
        output_post = dict()

        output_post['content-fr'] = post
        output_post['uuid'] = post['uuid']
        output_post['type'] = 'post'
52
        output_post['relations'] = find_relations( post['tags'] )
53
54
55
56
57
58
59

        output_posts.append(output_post)

    # print output
    return output_posts


Alessandro CERIONI's avatar
Alessandro CERIONI committed
60
61
62
63
64
65
def process_pages( pages ):

    output_pages = []

    for page in pages:

66
67
68
69
70
        output_page = dict()

        output_page['content-fr'] = page
        output_page['uuid'] = page['uuid']
        output_page['type'] = 'page'
71
        output_page['relations'] = find_relations( page['tags'] )
Alessandro CERIONI's avatar
Alessandro CERIONI committed
72
73
74
75
76
77
78

        output_pages.append(output_page)

    # print output
    return output_pages


79
80
81

if __name__ == '__main__':

82
83
    import json

84
85
86
87
    posts = [{u'og_image': None, u'codeinjection_foot': None, u'plaintext': u'Le contenu\n\nL\xe9gende\n--------------------------------------------------------------------------------\n\n/image \nhttps://highway-to-data.alpha.grandlyon.com/wp2-intothesky/2019/02/home-smartdata.jpg\n\nthis is another update', u'excerpt': u'Le contenu\n\nL\xe9gende\n--------------------------------------------------------------------------------\n\n/image \nhttps://highway-to-data.alpha.grandlyon.com/wp2-intothesky/2019/02/home-smartdata.jpg\n\nthis is another update', u'updated_at': u'2019-03-03T16:49:28.000Z', u'custom_template': None, u'published_at': u'2019-03-03T16:39:59.000Z', u'og_title': None, u'twitter_image': None, u'id': u'5c7906a70a48e60001b47cc4', u'twitter_description': None, u'meta_description': None, u'uuid': u'c9204cc3-da09-4759-abf9-36fab98c6e6e', u'title': u'Mon premier brouillon', u'twitter_title': None, u'html': u'<p>Le contenu</p><!--kg-card-begin: image--><figure class="kg-card kg-image-card kg-card-hascaption"><img src="https://ghost-intothesky.alpha.grandlyon.com/content/images/2019/03/Screenshot-from-2019-02-14-14-09-51.png" class="kg-image"><figcaption>L\xe9gende</figcaption></figure><!--kg-card-end: image--><!--kg-card-begin: hr--><hr><!--kg-card-end: hr--><!--kg-card-begin: markdown--><p>/image <a href="https://highway-to-data.alpha.grandlyon.com/wp2-intothesky/2019/02/home-smartdata.jpg">https://highway-to-data.alpha.grandlyon.com/wp2-intothesky/2019/02/home-smartdata.jpg</a></p>\n<!--kg-card-end: markdown--><p>this is another update</p>', u'primary_author': {u'website': None, u'bio': None, u'meta_title': None, u'name': u'Alessandro Cerioni', u'updated_at': u'2019-03-03T16:29:36.000Z', u'twitter': None, u'profile_image': None, u'accessibility': None, u'id': u'1', u'status': u'active', u'tour': u'["getting-started","featured-post"]', u'url': u'https://ghost-intothesky.alpha.grandlyon.com/author/alessandro/', u'facebook': None, u'location': None, u'cover_image': None, u'created_at': u'2019-03-01T08:26:45.000Z', u'meta_description': None, u'slug': u'alessandro', u'last_seen': u'2019-03-03T16:29:36.000Z'}, u'status': u'draft', u'primary_tag': {u'meta_description': None, u'meta_title': None, u'feature_image': None, u'name': u'test_tag', u'url': u'https://ghost-intothesky.alpha.grandlyon.com/404/', u'created_at': u'2019-03-03T16:45:44.000Z', u'updated_at': u'2019-03-03T16:45:44.000Z', u'visibility': u'public', u'id': u'5c7c04b8bf1a290001e7eb66', u'slug': u'test_tag', u'description': None}, u'meta_title': None, u'og_description': None, u'feature_image': None, u'tags': [{u'meta_description': None, u'meta_title': None, u'feature_image': None, u'name': u'test_tag', u'url': u'https://ghost-intothesky.alpha.grandlyon.com/404/', u'created_at': u'2019-03-03T16:45:44.000Z', u'updated_at': u'2019-03-03T16:45:44.000Z', u'visibility': u'public', u'id': u'5c7c04b8bf1a290001e7eb66', u'slug': u'test_tag', u'description': None}], u'comment_id': u'5c7906a70a48e60001b47cc4', u'custom_excerpt': None, u'authors': [{u'website': None, u'bio': None, u'meta_title': None, u'name': u'Alessandro Cerioni', u'updated_at': u'2019-03-03T16:29:36.000Z', u'twitter': None, u'profile_image': None, u'accessibility': None, u'id': u'1', u'status': u'active', u'tour': u'["getting-started","featured-post"]', u'url': u'https://ghost-intothesky.alpha.grandlyon.com/author/alessandro/', u'facebook': None, u'location': None, u'cover_image': None, u'created_at': u'2019-03-01T08:26:45.000Z', u'meta_description': None, u'slug': u'alessandro', u'last_seen': u'2019-03-03T16:29:36.000Z'}], u'slug': u'mon-premier-brouillon', u'url': u'https://ghost-intothesky.alpha.grandlyon.com/404/', u'created_at': u'2019-03-01T10:17:11.000Z', u'featured': True, u'codeinjection_head': None}]


    output = process_posts(posts)
88
    print( json.dumps(output, indent=4) )