Get lucky
Browse files- App/Embedding/utils/Initialize.py +18 -66
- App/app.py +1 -1
App/Embedding/utils/Initialize.py
CHANGED
@@ -1,74 +1,24 @@
|
|
1 |
from langchain.embeddings import HuggingFaceEmbeddings
|
2 |
from langchain.docstore.document import Document
|
3 |
-
from langchain.vectorstores import
|
4 |
-
|
5 |
-
|
6 |
-
import os,pprint
|
7 |
|
|
|
|
|
|
|
8 |
|
9 |
-
completion_base = os.environ.get("completion_base")
|
10 |
-
openai_api_key = os.environ.get("openai_api_key")
|
11 |
-
mongoDB = os.environ.get("MONGO_DB")
|
12 |
-
template = """### Given the following context
|
13 |
-
### Context
|
14 |
-
{context}
|
15 |
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
18 |
|
|
|
19 |
|
20 |
-
|
21 |
-
url = completion_base
|
22 |
-
|
23 |
-
payload = json.dumps(
|
24 |
-
{
|
25 |
-
"messages": [
|
26 |
-
{
|
27 |
-
"role": "system",
|
28 |
-
"content": "### You provide explanations based on the provided context",
|
29 |
-
},
|
30 |
-
{
|
31 |
-
"role": "user",
|
32 |
-
"content": template.format(context=context, question=question),
|
33 |
-
},
|
34 |
-
],
|
35 |
-
"model": "gpt-3.5-turbo",
|
36 |
-
"temperature": 1,
|
37 |
-
"presence_penalty": 0,
|
38 |
-
"top_p": 0.95,
|
39 |
-
"frequency_penalty": 0,
|
40 |
-
"stream": False,
|
41 |
-
}
|
42 |
-
)
|
43 |
-
|
44 |
-
headers = {
|
45 |
-
"Content-Type": "application/json",
|
46 |
-
"Authorization": f"Bearer {openai_api_key}",
|
47 |
-
}
|
48 |
-
|
49 |
-
async with aiohttp.ClientSession() as session:
|
50 |
-
async with session.post(url, headers=headers, data=payload) as response:
|
51 |
-
response = await response.json()
|
52 |
-
return response["choices"][0]["message"]["content"]
|
53 |
-
|
54 |
-
|
55 |
-
async def delete_documents(task_id):
|
56 |
-
client = AsyncIOMotorClient(mongoDB)
|
57 |
-
db = client["transcriptions"]
|
58 |
-
collection = db["videos"]
|
59 |
-
|
60 |
-
result = await collection.delete_many({"task_id": task_id})
|
61 |
-
print(f"Deleted {result.deleted_count} document(s)")
|
62 |
-
|
63 |
-
|
64 |
-
# mongo_client = MongoClient(
|
65 |
-
# mongoDB
|
66 |
-
# )
|
67 |
-
# model_name = "BAAI/bge-base-en"
|
68 |
-
# collection = mongo_client["transcriptions"]["videos"]
|
69 |
-
# embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
70 |
-
# index_name = "test_embeddings"
|
71 |
-
# vectorstore = MongoDBAtlasVectorSearch(collection, embeddings, index_name=index_name)
|
72 |
|
73 |
|
74 |
def generateChunks(chunks, task_id, n=100):
|
@@ -103,7 +53,6 @@ def search(query: str, task_id: str):
|
|
103 |
embedding=embeddings,
|
104 |
index_name="test_embedding",
|
105 |
)
|
106 |
-
|
107 |
data = vectorstore.similarity_search(
|
108 |
query=query,
|
109 |
pre_filter={"text": {"path": "task_id", "query": task_id}},
|
@@ -116,7 +65,10 @@ def search(query: str, task_id: str):
|
|
116 |
# data =[d.dict() for d in data]
|
117 |
# print(data[0].metadata.exclude({'_id','embedding'}))
|
118 |
# pprint.pprint(data[0].metadata)
|
119 |
-
return [
|
|
|
|
|
|
|
120 |
# agent =vectorstore.as_retriever(
|
121 |
|
122 |
# )
|
|
|
1 |
from langchain.embeddings import HuggingFaceEmbeddings
|
2 |
from langchain.docstore.document import Document
|
3 |
+
from langchain.vectorstores import Pinecone
|
4 |
+
import os
|
5 |
+
import pinecone
|
|
|
6 |
|
7 |
+
index_name = "movie-recommender-fast"
|
8 |
+
model_name = "thenlper/gte-base"
|
9 |
+
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
+
# get api key from app.pinecone.io
|
13 |
+
PINECONE_API_KEY = (
|
14 |
+
os.environ.get("PINECONE_API_KEY") or "0712a5e4-bcf3-4152-a726-27ee3a2676bb"
|
15 |
+
)
|
16 |
+
# find your environment next to the api key in pinecone console
|
17 |
+
PINECONE_ENV = os.environ.get("PINECONE_ENVIRONMENT") or "us-west4-gcp-free"
|
18 |
|
19 |
+
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
|
20 |
|
21 |
+
docsearch = Pinecone.from_existing_index(index_name, embeddings)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
|
24 |
def generateChunks(chunks, task_id, n=100):
|
|
|
53 |
embedding=embeddings,
|
54 |
index_name="test_embedding",
|
55 |
)
|
|
|
56 |
data = vectorstore.similarity_search(
|
57 |
query=query,
|
58 |
pre_filter={"text": {"path": "task_id", "query": task_id}},
|
|
|
65 |
# data =[d.dict() for d in data]
|
66 |
# print(data[0].metadata.exclude({'_id','embedding'}))
|
67 |
# pprint.pprint(data[0].metadata)
|
68 |
+
return [
|
69 |
+
{"text": d.page_content, "start": d.metadata["start"], "end": d.metadata["end"]}
|
70 |
+
for d in data
|
71 |
+
]
|
72 |
# agent =vectorstore.as_retriever(
|
73 |
|
74 |
# )
|
App/app.py
CHANGED
@@ -47,7 +47,7 @@ def authjwt_exception_handler(request: Request, exc: AuthJWTException):
|
|
47 |
|
48 |
@app.on_event("startup")
|
49 |
async def startup_event():
|
50 |
-
|
51 |
# await upload_bot.start()
|
52 |
# await models.create_all()
|
53 |
# models.metadata.create_all()
|
|
|
47 |
|
48 |
@app.on_event("startup")
|
49 |
async def startup_event():
|
50 |
+
await bot.start(bot_token="6183919505:AAEhHFt4mI18bQeAf2Lj7AePXFRPVLrOFM8")
|
51 |
# await upload_bot.start()
|
52 |
# await models.create_all()
|
53 |
# models.metadata.create_all()
|