forked from Naresh-varma/hackathon23
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreateEmbbedingForExistingData.js
121 lines (111 loc) · 3.9 KB
/
createEmbbedingForExistingData.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
const { OpenAI } = require("openai");
const BluebirdPromise = require("bluebird");
const request = require("request");
const fs = require("fs");
const MongoClient = require('mongodb').MongoClient;
const { Client } = require("@elastic/elasticsearch");
const _ = require('lodash');
const getEmbedding = require('./llmMain').getEmbeddings;
const client = new Client({
node: 'http://localhost:9200',
})
require("dotenv").config();
MongoUri = "mongodb+srv://devadmin:eY20dy7lcdWKBwFN@development-in.qx9yt.mongodb.net/";
const dbName = "ApplaudCloud-Naresh";
const mapper = {
'642132dd3d73be2200773da5-KnowledgeArticle': {
index: 'knowledgearticle',
fields: ['abstract', 'body'],
vectorField: 'knowledge-vector'
},
'642132dd3d73be2200773da5-Faq': {
index: 'faqs',
fields: ['answer'],
vectorField: 'faqs-vector'
},
'642132dd3d73be2200773da5-Vacancy': {
index: 'vacancies',
fields: ['jobTitle', 'yearsOfExpirence', 'jobDescription', 'skillText'],
vectorField: 'vacancy-vector'
},
'642132dd3d73be2200773da5-Personess': {
index: 'personess',
fields: ['firstName', 'lastName', 'jobLocation', 'skillText'],
vectorField: 'personess-vector'
}
}
const processData = (data, modelName) => new Promise((resolve, reject) => {
const modelDetails = mapper[modelName];
const fields = modelDetails.fields;
BluebirdPromise.mapSeries(data, (rec) => new Promise((resolve, reject) => {
let text = '';
rec['id'] = `${rec['_id']}`;
delete rec._id;
_.each(fields, (field) => {
if (rec[field]) text += `${rec[field]} `;
});
if (!text) return resolve();
console.log('text: ', text);
getEmbedding(text)
.then((embedRes) => {
if (embedRes) rec[modelDetails.vectorField] = embedRes
})
.then(() => resolve())
.catch((embedErr) => reject(embedErr));
}))
.then(() => resolve())
.catch(err => reject(err));
})
const makeBulkRequestToEls = (data, indexName) => new Promise((resolve, reject) => {
client.bulk({
body: data.flatMap(doc => [{ index: { _index: indexName } }, doc])
}).then((body) => {
console.log('Response :', body.items[0].index);
return resolve();
}).catch((err) => {
console.error('getting error while posting data to elastic search :', err);
return reject(err);
});
})
const collections = [
'642132dd3d73be2200773da5-KnowledgeArticle',
'642132dd3d73be2200773da5-Faq',
'642132dd3d73be2200773da5-Vacancy',
'642132dd3d73be2200773da5-Personess'
];
const seedData = (db) => new Promise((resolve, reject) => {
BluebirdPromise.mapSeries(collections, (modelName) => new Promise((colRes, colRej) => {
if (!mapper[modelName]) return colRes();
const collection = db.collection(modelName);
console.log('collection : ', collection);
collection.find({}).toArray((err, data) => {
if (err) {
console.error('Error while generating data :', err);
return colRes();
}
if (_.isEmpty(data)) {
console.log('no data found for model :', modelName);
return colRes();
}
processData(data, modelName)
.then(() => makeBulkRequestToEls(data, mapper[modelName].index) )
.then(() => colRes())
.catch(err => colRej(err))
});
}))
.then(() => {
console.log('data generated');
return resolve();
})
.catch(err => reject(err));
})
MongoClient.connect(MongoUri, (err, client) => {
if(err) console.log(err);
const db = client.db("642132dd3d73be2200773da5");
seedData(db)
.then(() => {
console.log('seed completed');
client.close();
})
.catch(console.error);
});