Mbonea commited on
Commit
55174eb
·
1 Parent(s): f1e72fe

initialize embeddings

Browse files
App/Embedding/EmbeddingRoutes.py CHANGED
@@ -15,9 +15,18 @@ async def create_embeddings(task_id):
15
 
16
  return
17
 
 
 
 
 
 
 
 
 
 
18
 
19
  # search
20
  # update?
21
- @embeddigs_router.get("/search_embeddings")
22
- async def search_embeddings(query, task_id):
23
  return search(query=query, task_id=task_id)
 
15
 
16
  return
17
 
18
+ @embeddigs_router.get("/create_summary")
19
+ async def create_embeddings(task_id):
20
+ item = await Transcriptions.objects.filter(task_id=task_id).first()
21
+ temp = item.content
22
+ chunks = generateChunks(temp, task_id)
23
+ encode(chunks)
24
+
25
+ return
26
+
27
 
28
  # search
29
  # update?
30
+ @embeddigs_router.post("/search_embeddings")
31
+ async def search_embeddings(query:str, task_id:str):
32
  return search(query=query, task_id=task_id)
App/Embedding/utils/Initialize.py CHANGED
@@ -1,14 +1,13 @@
1
  from langchain.embeddings import HuggingFaceEmbeddings
2
  from langchain.docstore.document import Document
3
  from langchain.vectorstores import MongoDBAtlasVectorSearch
4
- from langchain.vectorstores import FAISS
5
  from pymongo import MongoClient
6
  from motor.motor_asyncio import AsyncIOMotorClient
7
- import os
8
 
9
 
10
- completion_base=os.environ.get("completion_base")
11
- openai_api_key=os.environ.get("openai_api_key")
12
  mongoDB = os.environ.get("MONGO_DB")
13
  template = """### Given the following context
14
  ### Context
@@ -54,9 +53,7 @@ async def fetch_data(question, context):
54
 
55
 
56
  async def delete_documents(task_id):
57
- client = AsyncIOMotorClient(
58
- mongoDB
59
- )
60
  db = client["transcriptions"]
61
  collection = db["videos"]
62
 
@@ -95,9 +92,7 @@ def generateChunks(chunks, task_id, n=100):
95
 
96
 
97
  def search(query: str, task_id: str):
98
- mongo_client = MongoClient(
99
- mongoDB
100
- )
101
  model_name = "BAAI/bge-base-en"
102
  collection = mongo_client["transcriptions"]["videos"]
103
  embeddings = HuggingFaceEmbeddings(model_name=model_name)
@@ -120,7 +115,8 @@ def search(query: str, task_id: str):
120
  )
121
  # data =[d.dict() for d in data]
122
  # print(data[0].metadata.exclude({'_id','embedding'}))
123
- return [{"text": d.page_content} for d in data]
 
124
  # agent =vectorstore.as_retriever(
125
 
126
  # )
@@ -128,9 +124,7 @@ def search(query: str, task_id: str):
128
 
129
 
130
  def encode(temp: list[Document]):
131
- mongo_client = MongoClient(
132
- mongoDB
133
- )
134
  model_name = "BAAI/bge-base-en"
135
  collection = mongo_client["transcriptions"]["videos"]
136
  embeddings = HuggingFaceEmbeddings(model_name=model_name)
 
1
  from langchain.embeddings import HuggingFaceEmbeddings
2
  from langchain.docstore.document import Document
3
  from langchain.vectorstores import MongoDBAtlasVectorSearch
 
4
  from pymongo import MongoClient
5
  from motor.motor_asyncio import AsyncIOMotorClient
6
+ import os,pprint
7
 
8
 
9
+ completion_base = os.environ.get("completion_base")
10
+ openai_api_key = os.environ.get("openai_api_key")
11
  mongoDB = os.environ.get("MONGO_DB")
12
  template = """### Given the following context
13
  ### Context
 
53
 
54
 
55
  async def delete_documents(task_id):
56
+ client = AsyncIOMotorClient(mongoDB)
 
 
57
  db = client["transcriptions"]
58
  collection = db["videos"]
59
 
 
92
 
93
 
94
  def search(query: str, task_id: str):
95
+ mongo_client = MongoClient(mongoDB)
 
 
96
  model_name = "BAAI/bge-base-en"
97
  collection = mongo_client["transcriptions"]["videos"]
98
  embeddings = HuggingFaceEmbeddings(model_name=model_name)
 
115
  )
116
  # data =[d.dict() for d in data]
117
  # print(data[0].metadata.exclude({'_id','embedding'}))
118
+ pprint.pprint(data[0].metadata)
119
+ return [{"text": d.page_content,'start':d.metadata['start'],"end":d.metadata['end']} for d in data]
120
  # agent =vectorstore.as_retriever(
121
 
122
  # )
 
124
 
125
 
126
  def encode(temp: list[Document]):
127
+ mongo_client = MongoClient(mongoDB)
 
 
128
  model_name = "BAAI/bge-base-en"
129
  collection = mongo_client["transcriptions"]["videos"]
130
  embeddings = HuggingFaceEmbeddings(model_name=model_name)