Newer
Older
Alessandro Cerioni
committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from elasticsearch import Elasticsearch
from pymongo import MongoClient
import time
import msgpack
import pika
from utils.my_logging import logging
from utils.exit_gracefully import exit_gracefully
class NotEmptyQueueException(Exception):
pass
def main(cfg, wait=True):
from es_template import template
template['index_patterns'] = [ cfg['reindexer']['destination_index'] ]
if wait:
es_source = Elasticsearch([cfg['reindexer']['source_url']], timeout=60)
count = es_source.count(cfg['reindexer']['source_index']).get('count')
time.sleep(5)
es_source.indices.refresh(index=cfg['reindexer']['source_index'])
if es_source.count(cfg['reindexer']['source_index']).get('count') != count:
raise NotEmptyQueueException('Documents are still being pushed to the source index. Waiting...')
es = Elasticsearch([cfg['reindexer']['destination_url']], timeout=60)
try:
rep = es.indices.delete_template(cfg['reindexer']['template_name'])
logging.debug(rep)
except:
pass
rep = es.indices.put_template(cfg['reindexer']['template_name'], template)
# rep = es.indices.get_template("template_1")
logging.debug(rep)
body = { "source": {
"remote": { "host": cfg['reindexer']['source_url'] },
"index": cfg['reindexer']['source_index'],
"type": "_doc",
"size": 100
},
"dest": {
"index": cfg['reindexer']['destination_index'],
"type": "_doc"
}
}
# print(body)
rep = es.reindex(body, wait_for_completion=False) #, slices=24)#, timeout='120s')# timeout='120s', slices=1000, requests_per_second=-1)#, timeout='2m')
if 'task' in rep:
# logging the indexing session into MongoDB
mongo_client = MongoClient('mongodb://%s:%s@%s:%s/' % (cfg['mongo']['username'],
cfg['mongo']['password'],
cfg['mongo']['host'],
cfg['mongo']['port']))
mongo_db = mongo_client[cfg['mongo']['report-db']]
mongo_collection = mongo_db['indexing-sessions']
reindex_task_url = "%s/_tasks/%s" % (cfg['reindexer']['destination_url'], rep['task'])
res = mongo_collection.find_one_and_update({'_id': cfg['session']['id']}, {'$set': {'reindex_task': reindex_task_url}}, upsert=True)
if __name__ == '__main__':
import yaml
import time
import signal
signal.signal(signal.SIGINT, exit_gracefully)
with open("config.yaml", 'r') as yamlfile:
cfg = yaml.load(yamlfile)
try:
main(cfg, wait=True)
except pika.exceptions.ChannelClosed:
logging.info("Waiting for tasks...")
#time.sleep(5)
except Exception as e:
logging.error(e)
#time.sleep(5)