create an index if none exists
Browse files
App/Embedding/utils/Initialize.py
CHANGED
@@ -7,23 +7,37 @@ import os
|
|
7 |
|
8 |
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
|
14 |
# find your environment next to the api key in pinecone console
|
15 |
PINECONE_ENV = os.environ.get("PINECONE_ENVIRONMENT")
|
16 |
|
17 |
|
18 |
index_name = "transcript-bits"
|
19 |
-
model_name = "
|
20 |
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
21 |
spec = PodSpec()
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
|
|
27 |
|
28 |
docsearch.delete(
|
29 |
filter={
|
@@ -54,20 +68,7 @@ def generateChunks(chunks, task_id, n=100):
|
|
54 |
|
55 |
|
56 |
def search(query: str, task_id: str):
|
57 |
-
|
58 |
-
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
|
59 |
-
# find your environment next to the api key in pinecone console
|
60 |
-
PINECONE_ENV = os.environ.get("PINECONE_ENVIRONMENT")
|
61 |
-
|
62 |
-
|
63 |
-
index_name = "transcript-bits"
|
64 |
-
model_name = "thenlper/gte-base"
|
65 |
-
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
66 |
-
|
67 |
-
|
68 |
-
pc=pinecone.Pinecone(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
|
69 |
-
vector_index = pc.Index(index_name)
|
70 |
-
docsearch = Pinecone.from_existing_index(index_name, embeddings)
|
71 |
|
72 |
filtering_conditions = {
|
73 |
"task_id": {"$eq": task_id},
|
@@ -81,18 +82,10 @@ def search(query: str, task_id: str):
|
|
81 |
|
82 |
|
83 |
def encode(temp: list[Document]):
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
|
89 |
-
index_name = "transcript-bits"
|
90 |
-
model_name = "thenlper/gte-base"
|
91 |
-
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
92 |
|
93 |
|
94 |
-
|
95 |
-
vector_index = pc.Index(index_name)
|
96 |
-
docsearch = Pinecone.from_existing_index(index_name, embeddings)
|
97 |
-
docsearch.add_documents(temp)
|
98 |
-
# return embeddings.embed_documents(texts = [d.page_content for d in temp])
|
|
|
7 |
|
8 |
|
9 |
|
10 |
+
def initDocument():
|
11 |
+
# get api key from app.pinecone.io
|
12 |
+
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY",'')
|
|
|
13 |
# find your environment next to the api key in pinecone console
|
14 |
PINECONE_ENV = os.environ.get("PINECONE_ENVIRONMENT")
|
15 |
|
16 |
|
17 |
index_name = "transcript-bits"
|
18 |
+
model_name = "Alibaba-NLP/gte-base-en-v1.5"
|
19 |
embeddings = HuggingFaceEmbeddings(model_name=model_name)
|
20 |
spec = PodSpec()
|
21 |
|
22 |
+
try:
|
23 |
+
pc=pinecone.Pinecone(api_key=PINECONE_API_KEY, environment=PINECONE_ENV,spec=spec)
|
24 |
+
vector_index = pc.Index(index_name)
|
25 |
+
except:
|
26 |
+
pc.create_index(name=index_name,
|
27 |
+
dimension=768,
|
28 |
+
metric="cosine",
|
29 |
+
spec=spec)
|
30 |
+
|
31 |
+
|
32 |
+
docsearch = Pinecone.from_existing_index(index_name, embeddings)
|
33 |
+
|
34 |
+
return docsearch
|
35 |
|
36 |
+
|
37 |
+
|
38 |
+
|
39 |
+
async def delete_documents(task_id):
|
40 |
+
docsearch=initDocument()
|
41 |
|
42 |
docsearch.delete(
|
43 |
filter={
|
|
|
68 |
|
69 |
|
70 |
def search(query: str, task_id: str):
|
71 |
+
docsearch=initDocument()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
filtering_conditions = {
|
74 |
"task_id": {"$eq": task_id},
|
|
|
82 |
|
83 |
|
84 |
def encode(temp: list[Document]):
|
85 |
+
docsearch=initDocument()
|
86 |
+
docsearch.add_documents(temp)
|
87 |
+
# return embeddings.embed_documents(texts = [d.page_content for d in temp])
|
|
|
88 |
|
|
|
|
|
|
|
89 |
|
90 |
|
91 |
+
initDocument()
|
|
|
|
|
|
|
|