Mbonea commited on
Commit
9987bca
·
1 Parent(s): f7b04a6
Files changed (2) hide show
  1. App/Embedding/utils/Initialize.py +18 -66
  2. App/app.py +1 -1
App/Embedding/utils/Initialize.py CHANGED
@@ -1,74 +1,24 @@
1
  from langchain.embeddings import HuggingFaceEmbeddings
2
  from langchain.docstore.document import Document
3
- from langchain.vectorstores import MongoDBAtlasVectorSearch
4
- from pymongo import MongoClient
5
- from motor.motor_asyncio import AsyncIOMotorClient
6
- import os,pprint
7
 
 
 
 
8
 
9
- completion_base = os.environ.get("completion_base")
10
- openai_api_key = os.environ.get("openai_api_key")
11
- mongoDB = os.environ.get("MONGO_DB")
12
- template = """### Given the following context
13
- ### Context
14
- {context}
15
 
16
- ### Use it to explain the question: {question}
17
- """
 
 
 
 
18
 
 
19
 
20
- async def fetch_data(question, context):
21
- url = completion_base
22
-
23
- payload = json.dumps(
24
- {
25
- "messages": [
26
- {
27
- "role": "system",
28
- "content": "### You provide explanations based on the provided context",
29
- },
30
- {
31
- "role": "user",
32
- "content": template.format(context=context, question=question),
33
- },
34
- ],
35
- "model": "gpt-3.5-turbo",
36
- "temperature": 1,
37
- "presence_penalty": 0,
38
- "top_p": 0.95,
39
- "frequency_penalty": 0,
40
- "stream": False,
41
- }
42
- )
43
-
44
- headers = {
45
- "Content-Type": "application/json",
46
- "Authorization": f"Bearer {openai_api_key}",
47
- }
48
-
49
- async with aiohttp.ClientSession() as session:
50
- async with session.post(url, headers=headers, data=payload) as response:
51
- response = await response.json()
52
- return response["choices"][0]["message"]["content"]
53
-
54
-
55
- async def delete_documents(task_id):
56
- client = AsyncIOMotorClient(mongoDB)
57
- db = client["transcriptions"]
58
- collection = db["videos"]
59
-
60
- result = await collection.delete_many({"task_id": task_id})
61
- print(f"Deleted {result.deleted_count} document(s)")
62
-
63
-
64
- # mongo_client = MongoClient(
65
- # mongoDB
66
- # )
67
- # model_name = "BAAI/bge-base-en"
68
- # collection = mongo_client["transcriptions"]["videos"]
69
- # embeddings = HuggingFaceEmbeddings(model_name=model_name)
70
- # index_name = "test_embeddings"
71
- # vectorstore = MongoDBAtlasVectorSearch(collection, embeddings, index_name=index_name)
72
 
73
 
74
  def generateChunks(chunks, task_id, n=100):
@@ -103,7 +53,6 @@ def search(query: str, task_id: str):
103
  embedding=embeddings,
104
  index_name="test_embedding",
105
  )
106
-
107
  data = vectorstore.similarity_search(
108
  query=query,
109
  pre_filter={"text": {"path": "task_id", "query": task_id}},
@@ -116,7 +65,10 @@ def search(query: str, task_id: str):
116
  # data =[d.dict() for d in data]
117
  # print(data[0].metadata.exclude({'_id','embedding'}))
118
  # pprint.pprint(data[0].metadata)
119
- return [{"text": d.page_content,'start':d.metadata['start'],"end":d.metadata['end']} for d in data]
 
 
 
120
  # agent =vectorstore.as_retriever(
121
 
122
  # )
 
1
  from langchain.embeddings import HuggingFaceEmbeddings
2
  from langchain.docstore.document import Document
3
+ from langchain.vectorstores import Pinecone
4
+ import os
5
+ import pinecone
 
6
 
7
+ index_name = "movie-recommender-fast"
8
+ model_name = "thenlper/gte-base"
9
+ embeddings = HuggingFaceEmbeddings(model_name=model_name)
10
 
 
 
 
 
 
 
11
 
12
+ # get api key from app.pinecone.io
13
+ PINECONE_API_KEY = (
14
+ os.environ.get("PINECONE_API_KEY") or "0712a5e4-bcf3-4152-a726-27ee3a2676bb"
15
+ )
16
+ # find your environment next to the api key in pinecone console
17
+ PINECONE_ENV = os.environ.get("PINECONE_ENVIRONMENT") or "us-west4-gcp-free"
18
 
19
+ pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
20
 
21
+ docsearch = Pinecone.from_existing_index(index_name, embeddings)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
 
24
  def generateChunks(chunks, task_id, n=100):
 
53
  embedding=embeddings,
54
  index_name="test_embedding",
55
  )
 
56
  data = vectorstore.similarity_search(
57
  query=query,
58
  pre_filter={"text": {"path": "task_id", "query": task_id}},
 
65
  # data =[d.dict() for d in data]
66
  # print(data[0].metadata.exclude({'_id','embedding'}))
67
  # pprint.pprint(data[0].metadata)
68
+ return [
69
+ {"text": d.page_content, "start": d.metadata["start"], "end": d.metadata["end"]}
70
+ for d in data
71
+ ]
72
  # agent =vectorstore.as_retriever(
73
 
74
  # )
App/app.py CHANGED
@@ -47,7 +47,7 @@ def authjwt_exception_handler(request: Request, exc: AuthJWTException):
47
 
48
  @app.on_event("startup")
49
  async def startup_event():
50
- # await bot.start(bot_token="6183919505:AAEhHFt4mI18bQeAf2Lj7AePXFRPVLrOFM8")
51
  # await upload_bot.start()
52
  # await models.create_all()
53
  # models.metadata.create_all()
 
47
 
48
  @app.on_event("startup")
49
  async def startup_event():
50
+ await bot.start(bot_token="6183919505:AAEhHFt4mI18bQeAf2Lj7AePXFRPVLrOFM8")
51
  # await upload_bot.start()
52
  # await models.create_all()
53
  # models.metadata.create_all()