initialize embeddings
Browse files
App/Embedding/EmbeddingRoutes.py
CHANGED
@@ -15,9 +15,18 @@ async def create_embeddings(task_id):
|
|
15 |
|
16 |
return
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
# search
|
20 |
# update?
|
21 |
-
@embeddigs_router.
|
22 |
-
async def search_embeddings(query, task_id):
|
23 |
return search(query=query, task_id=task_id)
|
|
|
15 |
|
16 |
return
|
17 |
|
18 |
+
@embeddigs_router.get("/create_summary")
|
19 |
+
async def create_embeddings(task_id):
|
20 |
+
item = await Transcriptions.objects.filter(task_id=task_id).first()
|
21 |
+
temp = item.content
|
22 |
+
chunks = generateChunks(temp, task_id)
|
23 |
+
encode(chunks)
|
24 |
+
|
25 |
+
return
|
26 |
+
|
27 |
|
28 |
# search
|
29 |
# update?
|
30 |
+
@embeddigs_router.post("/search_embeddings")
|
31 |
+
async def search_embeddings(query:str, task_id:str):
|
32 |
return search(query=query, task_id=task_id)
|
App/Embedding/utils/Initialize.py
CHANGED
@@ -1,14 +1,13 @@
|
|
1 |
from langchain.embeddings import HuggingFaceEmbeddings
|
2 |
from langchain.docstore.document import Document
|
3 |
from langchain.vectorstores import MongoDBAtlasVectorSearch
|
4 |
-
from langchain.vectorstores import FAISS
|
5 |
from pymongo import MongoClient
|
6 |
from motor.motor_asyncio import AsyncIOMotorClient
|
7 |
-
import os
|
8 |
|
9 |
|
10 |
-
completion_base=os.environ.get("completion_base")
|
11 |
-
openai_api_key=os.environ.get("openai_api_key")
|
12 |
mongoDB = os.environ.get("MONGO_DB")
|
13 |
template = """### Given the following context
|
14 |
### Context
|
@@ -54,9 +53,7 @@ async def fetch_data(question, context):
|
|
54 |
|
55 |
|
56 |
async def delete_documents(task_id):
|
57 |
-
client = AsyncIOMotorClient(
|
58 |
-
mongoDB
|
59 |
-
)
|
60 |
db = client["transcriptions"]
|
61 |
collection = db["videos"]
|
62 |
|
@@ -95,9 +92,7 @@ def generateChunks(chunks, task_id, n=100):
|
|
95 |
|
96 |
|
97 |
def search(query: str, task_id: str):
|
98 |
-
mongo_client = MongoClient(
|
99 |
-
mongoDB
|
100 |
-
)
|
101 |
model_name = "BAAI/bge-base-en"
|
102 |
collection = mongo_client["transcriptions"]["videos"]
|
103 |
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
@@ -120,7 +115,8 @@ def search(query: str, task_id: str):
|
|
120 |
)
|
121 |
# data =[d.dict() for d in data]
|
122 |
# print(data[0].metadata.exclude({'_id','embedding'}))
|
123 |
-
|
|
|
124 |
# agent =vectorstore.as_retriever(
|
125 |
|
126 |
# )
|
@@ -128,9 +124,7 @@ def search(query: str, task_id: str):
|
|
128 |
|
129 |
|
130 |
def encode(temp: list[Document]):
|
131 |
-
mongo_client = MongoClient(
|
132 |
-
mongoDB
|
133 |
-
)
|
134 |
model_name = "BAAI/bge-base-en"
|
135 |
collection = mongo_client["transcriptions"]["videos"]
|
136 |
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
|
|
1 |
from langchain.embeddings import HuggingFaceEmbeddings
|
2 |
from langchain.docstore.document import Document
|
3 |
from langchain.vectorstores import MongoDBAtlasVectorSearch
|
|
|
4 |
from pymongo import MongoClient
|
5 |
from motor.motor_asyncio import AsyncIOMotorClient
|
6 |
+
import os,pprint
|
7 |
|
8 |
|
9 |
+
completion_base = os.environ.get("completion_base")
|
10 |
+
openai_api_key = os.environ.get("openai_api_key")
|
11 |
mongoDB = os.environ.get("MONGO_DB")
|
12 |
template = """### Given the following context
|
13 |
### Context
|
|
|
53 |
|
54 |
|
55 |
async def delete_documents(task_id):
|
56 |
+
client = AsyncIOMotorClient(mongoDB)
|
|
|
|
|
57 |
db = client["transcriptions"]
|
58 |
collection = db["videos"]
|
59 |
|
|
|
92 |
|
93 |
|
94 |
def search(query: str, task_id: str):
|
95 |
+
mongo_client = MongoClient(mongoDB)
|
|
|
|
|
96 |
model_name = "BAAI/bge-base-en"
|
97 |
collection = mongo_client["transcriptions"]["videos"]
|
98 |
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
|
|
115 |
)
|
116 |
# data =[d.dict() for d in data]
|
117 |
# print(data[0].metadata.exclude({'_id','embedding'}))
|
118 |
+
pprint.pprint(data[0].metadata)
|
119 |
+
return [{"text": d.page_content,'start':d.metadata['start'],"end":d.metadata['end']} for d in data]
|
120 |
# agent =vectorstore.as_retriever(
|
121 |
|
122 |
# )
|
|
|
124 |
|
125 |
|
126 |
def encode(temp: list[Document]):
|
127 |
+
mongo_client = MongoClient(mongoDB)
|
|
|
|
|
128 |
model_name = "BAAI/bge-base-en"
|
129 |
collection = mongo_client["transcriptions"]["videos"]
|
130 |
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|