Commit 528db148 authored by Sébastien DA ROCHA's avatar Sébastien DA ROCHA
Browse files

Merge branch 'image-servie-par-minio' into 'development'

image servie par minio

See merge request !4
parents d61304fc 3f046e88
Pipeline #14413 canceled with stages
......@@ -14,3 +14,14 @@ For a local deployment:
1. `envsubst < config.template.yaml > config.yaml`
2. `docker-compose up [-d]`
## Running the app in dev
>python3 -m venv venv
>source venv/bin/activate
>pip install -r requirements.txt
>gunicorn --workers=2 -b 0.0.0.0:8001 --log-level=debug api:api
ou python main.py
content_getter:
ghost_api: ${GHOST_API}
ghost_admin_api_key: ${GHOST_ADMIN_API_KEY}
minio_access_key: ${MINIO_ACCESS_KEY}
minio_secret_key: ${MINIO_SECRET_KEY}
minio_write_host: ${MINIO_WRITE_HOST}
minio_read_url: ${MINIO_READ_URL}
indexer:
url: ${INDEXER__ES_URL}
......
......@@ -13,3 +13,7 @@ export REINDEXER__DESTINATION_POSTS_INDEX=<the_destination_elasticsearch_index_f
export REINDEXER__DESTINATION_PAGES_INDEX=<the_destination_elasticsearch_index_for_pages>
export REINDEXER__POSTS_TEMPLATE_NAME=<the_name_to_use_for_the_elasticsearch_dynamic_template_for_posts>
export REINDEXER__PAGES_TEMPLATE_NAME=<the_name_to_use_for_the_elasticsearch_dynamic_template_for_pages>
export MINIO_ACCESS_KEY=<the_minio_access_key>
export MINIO_SECRET_KEY=<the_minio_secret_key>
export MINIO_WRITE_HOST=<the_minio_write_host>
export MINIO_READ_URL=<the_minio_read_url>
import json
import time
import hashlib
import re
from pprint import pformat
from elasticsearch import Elasticsearch
from minio import Minio
import urllib.request
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
bucket_name="ghost-bucket"
if __name__ != '__main__':
gunicorn_logger = logging.getLogger('gunicorn.error')
logger.handlers = gunicorn_logger.handlers
......@@ -77,6 +83,7 @@ def index_docs( cfg, content_type, docs ):
es_body = ''
minio_client = initMinioClient(cfg)
for doc in docs:
# try:
# header['index']['_id'] = doc['id'] #hashlib.md5( json.dumps(doc, sort_keys=True).encode("utf-8") ).hexdigest()
......@@ -85,7 +92,9 @@ def index_docs( cfg, content_type, docs ):
# header['index']['_id'] = hashlib.md5( json.dumps(doc, sort_keys=True).encode("utf-8") ).hexdigest()
header['index']['_id'] = doc['content-fr']['id']
#print(header)
es_body += '{0}\n{1}\n'.format(json.dumps(header), json.dumps(doc))
doc=replaceGhostUrlByMinioUrl(cfg,minio_client,doc)
jsondoc=json.dumps(doc)
es_body += '{0}\n{1}\n'.format(json.dumps(header), jsondoc)
t1 = time.time()
......@@ -101,3 +110,80 @@ def index_docs( cfg, content_type, docs ):
#print("")
logging.critical("| Failed.")
return False
def initMinioClient(cfg):
minio_access_key = cfg['content_getter']['minio_access_key']
minio_secret_key = cfg['content_getter']['minio_secret_key']
minio_write_host =cfg['content_getter']['minio_write_host']
client = Minio(
minio_write_host,
access_key=minio_access_key,
secret_key=minio_secret_key
)
if client.bucket_exists(bucket_name):
logger.debug(bucket_name+" exists")
else:
client.make_bucket(bucket_name)
# pour que les images soient accessibles en lecture
policy={"Version":"2012-10-17","Statement":
[
{"Effect":"Allow",
"Principal":{"AWS":["*"]},
"Action":["s3:GetBucketLocation","s3:ListBucket"],
"Resource":["arn:aws:s3:::ghost-bucket"]
},
{ "Effect":"Allow","Principal":{"AWS":["*"]},
"Action":["s3:GetObject"],
"Resource":["arn:aws:s3:::ghost-bucket/*","arn:aws:s3:::ghost-bucket/***"]
},
{ "Effect":"Allow","Principal":{"AWS":["*"]},
"Action":["s3:ListBucket"],
"Resource":["arn:aws:s3:::ghost-bucket"],
"Condition":{"StringEquals":{"s3:prefix":["**"]}}}]
}
client.set_bucket_policy(bucket_name, json.dumps(policy))
return client
def replaceGhostUrlByMinioUrl(cfg,client,doc):
minio_read_url = cfg['content_getter']['minio_read_url']
fr_content=doc['content-fr']['html']
logger.debug(pformat(doc['content-fr']['id']))
logger.debug(pformat(doc))
feature_image=doc['content-fr']['feature_image']
if feature_image is not None:
logger.debug('feature_image: '+feature_image)
pushToMinio(client,bucket_name,feature_image)
file_path=feature_image.split('images/')[1]
new_url=minio_read_url+'/'+bucket_name+'/'+file_path
logger.debug('New URL:'+new_url)
doc['content-fr']['feature_image']=new_url
urls = re.findall('src="(https:\/\/ghost[^"]*)', fr_content)
for url in urls:
logger.debug('URL '+url)
pushToMinio(client,bucket_name,url)
newcontent=re.sub('(https:\/\/ghost[^"]*\/images)', minio_read_url+'/'+bucket_name, fr_content)
logger.debug(newcontent)
doc['content-fr']['html']=newcontent
return doc
def pushToMinio( client,bucket_name,image ):
file_path=image.split('images/')[1]
# on verifie si l'image exise deja
fileExist=True
try :
result = client.stat_object(bucket_name, file_path)
except Exception:
fileExist=False
# si l'image exise deja on ne fait rien, sinon on envoie a minio
if not fileExist:
try:
data = urllib.request.urlopen(image)
client.put_object(
bucket_name, file_path, data, length=-1, part_size=10*1024*1024)
except Exception :
logger.warning("erreur avec l'image "+image)
return True
\ No newline at end of file
......@@ -5,3 +5,4 @@ elasticsearch>=6.0.0,<7.0.0
flask
gunicorn
pyyaml
minio
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment