Skip to content
Snippets Groups Projects
Commit fe27f6c3 authored by Sébastien DA ROCHA's avatar Sébastien DA ROCHA
Browse files

Merge branch 'development' into 'master'

Deploy images to minio

See merge request !6
parents 248df9ca 3d59ec28
No related branches found
No related tags found
2 merge requests!9REDMINE 13316 : crée les index avec 1 replica,!6Deploy images to minio
Pipeline #14438 passed
......@@ -7,6 +7,8 @@ variables:
sonarqube:
stage: sonar-analysis
tags:
- build
before_script:
- export PATH=$PATH:/usr/local/bin/sonar-scanner-3.2.0.1227-linux/bin/
- export NODE_PATH=$NODE_PATH:`npm root -g`
......
......@@ -14,3 +14,14 @@ For a local deployment:
1. `envsubst < config.template.yaml > config.yaml`
2. `docker-compose up [-d]`
## Running the app in dev
>python3 -m venv venv
>source venv/bin/activate
>pip install -r requirements.txt
>gunicorn --workers=2 -b 0.0.0.0:8001 --log-level=debug api:api
ou python main.py
content_getter:
ghost_api: ${GHOST_API}
ghost_admin_api_key: ${GHOST_ADMIN_API_KEY}
minio_access_key: ${MINIO_ACCESS_KEY}
minio_secret_key: ${MINIO_SECRET_KEY}
minio_write_host: ${MINIO_WRITE_HOST}
minio_read_url: ${MINIO_READ_URL}
indexer:
url: ${INDEXER__ES_URL}
......
......@@ -13,3 +13,7 @@ export REINDEXER__DESTINATION_POSTS_INDEX=<the_destination_elasticsearch_index_f
export REINDEXER__DESTINATION_PAGES_INDEX=<the_destination_elasticsearch_index_for_pages>
export REINDEXER__POSTS_TEMPLATE_NAME=<the_name_to_use_for_the_elasticsearch_dynamic_template_for_posts>
export REINDEXER__PAGES_TEMPLATE_NAME=<the_name_to_use_for_the_elasticsearch_dynamic_template_for_pages>
export MINIO_ACCESS_KEY=<the_minio_access_key>
export MINIO_SECRET_KEY=<the_minio_secret_key>
export MINIO_WRITE_HOST=<the_minio_write_host>
export MINIO_READ_URL=<the_minio_read_url>
import json
import time
import hashlib
import re
from pprint import pformat
from elasticsearch import Elasticsearch
from minio import Minio
import urllib.request
import logging
logging.basicConfig()
logger = logging.getLogger(__name__)
bucket_name="ghost-bucket"
if __name__ != '__main__':
gunicorn_logger = logging.getLogger('gunicorn.error')
logger.handlers = gunicorn_logger.handlers
......@@ -77,6 +83,7 @@ def index_docs( cfg, content_type, docs ):
es_body = ''
minio_client = initMinioClient(cfg)
for doc in docs:
# try:
# header['index']['_id'] = doc['id'] #hashlib.md5( json.dumps(doc, sort_keys=True).encode("utf-8") ).hexdigest()
......@@ -85,7 +92,9 @@ def index_docs( cfg, content_type, docs ):
# header['index']['_id'] = hashlib.md5( json.dumps(doc, sort_keys=True).encode("utf-8") ).hexdigest()
header['index']['_id'] = doc['content-fr']['id']
#print(header)
es_body += '{0}\n{1}\n'.format(json.dumps(header), json.dumps(doc))
doc=replaceGhostUrlByMinioUrl(cfg,minio_client,doc)
jsondoc=json.dumps(doc)
es_body += '{0}\n{1}\n'.format(json.dumps(header), jsondoc)
t1 = time.time()
......@@ -101,3 +110,80 @@ def index_docs( cfg, content_type, docs ):
#print("")
logging.critical("| Failed.")
return False
def initMinioClient(cfg):
minio_access_key = cfg['content_getter']['minio_access_key']
minio_secret_key = cfg['content_getter']['minio_secret_key']
minio_write_host =cfg['content_getter']['minio_write_host']
client = Minio(
minio_write_host,
access_key=minio_access_key,
secret_key=minio_secret_key
)
if client.bucket_exists(bucket_name):
logger.debug(bucket_name+" exists")
else:
client.make_bucket(bucket_name)
# pour que les images soient accessibles en lecture
policy={"Version":"2012-10-17","Statement":
[
{"Effect":"Allow",
"Principal":{"AWS":["*"]},
"Action":["s3:GetBucketLocation","s3:ListBucket"],
"Resource":["arn:aws:s3:::ghost-bucket"]
},
{ "Effect":"Allow","Principal":{"AWS":["*"]},
"Action":["s3:GetObject"],
"Resource":["arn:aws:s3:::ghost-bucket/*","arn:aws:s3:::ghost-bucket/***"]
},
{ "Effect":"Allow","Principal":{"AWS":["*"]},
"Action":["s3:ListBucket"],
"Resource":["arn:aws:s3:::ghost-bucket"],
"Condition":{"StringEquals":{"s3:prefix":["**"]}}}]
}
client.set_bucket_policy(bucket_name, json.dumps(policy))
return client
def replaceGhostUrlByMinioUrl(cfg,client,doc):
minio_read_url = cfg['content_getter']['minio_read_url']
fr_content=doc['content-fr']['html']
logger.debug(pformat(doc['content-fr']['id']))
logger.debug(pformat(doc))
feature_image=doc['content-fr']['feature_image']
if feature_image is not None:
logger.debug('feature_image: '+feature_image)
pushToMinio(client,bucket_name,feature_image)
file_path=feature_image.split('images/')[1]
new_url=minio_read_url+'/'+bucket_name+'/'+file_path
logger.debug('New URL:'+new_url)
doc['content-fr']['feature_image']=new_url
urls = re.findall('src="(https:\/\/ghost[^"]*)', fr_content)
for url in urls:
logger.debug('URL '+url)
pushToMinio(client,bucket_name,url)
newcontent=re.sub('(https:\/\/ghost[^"]*\/images)', minio_read_url+'/'+bucket_name, fr_content)
logger.debug(newcontent)
doc['content-fr']['html']=newcontent
return doc
def pushToMinio( client,bucket_name,image ):
file_path=image.split('images/')[1]
# on verifie si l'image exise deja
fileExist=True
try :
result = client.stat_object(bucket_name, file_path)
except Exception:
fileExist=False
# si l'image exise deja on ne fait rien, sinon on envoie a minio
if not fileExist:
try:
data = urllib.request.urlopen(image)
client.put_object(
bucket_name, file_path, data, length=-1, part_size=10*1024*1024)
except Exception :
logger.warning("erreur avec l'image "+image)
return True
\ No newline at end of file
......@@ -5,3 +5,4 @@ elasticsearch>=6.0.0,<7.0.0
flask
gunicorn
pyyaml
minio
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment