Spaces:
Sleeping
Sleeping
jeevan
commited on
Commit
·
4c501f4
1
Parent(s):
637aeec
working local version
Browse files- RagPipeline.py +1 -1
- aimakerspace/vectordatabase.py +45 -12
RagPipeline.py
CHANGED
@@ -23,7 +23,7 @@ class RetrievalAugmentedQAPipeline:
|
|
23 |
context_list = self.vector_db_retriever.search_by_text(user_query, k=4)
|
24 |
|
25 |
context_prompt = ""
|
26 |
-
for context in context_list
|
27 |
context_prompt += context[0] + "\n"
|
28 |
|
29 |
formatted_system_prompt = self.system_role_prompt.create_message()
|
|
|
23 |
context_list = self.vector_db_retriever.search_by_text(user_query, k=4)
|
24 |
|
25 |
context_prompt = ""
|
26 |
+
for context in context_list:
|
27 |
context_prompt += context[0] + "\n"
|
28 |
|
29 |
formatted_system_prompt = self.system_role_prompt.create_message()
|
aimakerspace/vectordatabase.py
CHANGED
@@ -6,7 +6,7 @@ from typing import List, Tuple, Callable
|
|
6 |
from aimakerspace.openai_utils.embedding import EmbeddingModel
|
7 |
import asyncio
|
8 |
from qdrant_client import models, QdrantClient
|
9 |
-
from qdrant_client.models import PointStruct,VectorParams,Distance
|
10 |
|
11 |
collection_name = "embedding_collection"
|
12 |
|
@@ -76,20 +76,20 @@ class VectorDatabase:
|
|
76 |
self.qdrant_client = QdrantClient(":memory:")
|
77 |
vector_params = VectorParams(
|
78 |
size=embedding_model.dimensions, # vector size
|
79 |
-
distance=
|
80 |
)
|
81 |
-
self.qdrant_client.
|
82 |
collection_name=collection_name,
|
83 |
-
vectors_config={"
|
84 |
)
|
85 |
|
86 |
-
def insert(self, key: str,
|
87 |
idx = str(uuid.uuid4())
|
88 |
payload = {"text": key}
|
89 |
|
90 |
point = PointStruct(
|
91 |
id=idx,
|
92 |
-
vector={"default":
|
93 |
payload=payload
|
94 |
)
|
95 |
# Insert the vector into Qdrant with the associated document
|
@@ -97,9 +97,25 @@ class VectorDatabase:
|
|
97 |
collection_name=collection_name,
|
98 |
points=[point]
|
99 |
)
|
100 |
-
print(f"Inserted vector with ID {idx}: {vector}")
|
101 |
-
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
def search(
|
104 |
self,
|
105 |
query_vector: np.array,
|
@@ -114,7 +130,7 @@ class VectorDatabase:
|
|
114 |
|
115 |
search_results = self.qdrant_client.search(
|
116 |
collection_name=collection_name,
|
117 |
-
query_vector=query_vector,
|
118 |
limit=k
|
119 |
)
|
120 |
return [(result.payload['text'], result.score) for result in search_results]
|
@@ -136,8 +152,25 @@ class VectorDatabase:
|
|
136 |
|
137 |
async def abuild_from_list(self, list_of_text: List[str]) -> "VectorDatabase":
|
138 |
embeddings = await self.embedding_model.async_get_embeddings(list_of_text)
|
139 |
-
|
140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
return self
|
142 |
|
143 |
|
|
|
6 |
from aimakerspace.openai_utils.embedding import EmbeddingModel
|
7 |
import asyncio
|
8 |
from qdrant_client import models, QdrantClient
|
9 |
+
from qdrant_client.models import PointStruct,VectorParams,Distance,Batch,VectorStruct,Payload
|
10 |
|
11 |
collection_name = "embedding_collection"
|
12 |
|
|
|
76 |
self.qdrant_client = QdrantClient(":memory:")
|
77 |
vector_params = VectorParams(
|
78 |
size=embedding_model.dimensions, # vector size
|
79 |
+
distance=Distance.COSINE
|
80 |
)
|
81 |
+
self.qdrant_client.create_collection(
|
82 |
collection_name=collection_name,
|
83 |
+
vectors_config={"text": vector_params},
|
84 |
)
|
85 |
|
86 |
+
def insert(self, key: str, vectors: np.array) -> None:
|
87 |
idx = str(uuid.uuid4())
|
88 |
payload = {"text": key}
|
89 |
|
90 |
point = PointStruct(
|
91 |
id=idx,
|
92 |
+
vector={"default": vectors.tolist()},
|
93 |
payload=payload
|
94 |
)
|
95 |
# Insert the vector into Qdrant with the associated document
|
|
|
97 |
collection_name=collection_name,
|
98 |
points=[point]
|
99 |
)
|
100 |
+
# print(f"Inserted vector with ID {idx}: {vector}")
|
101 |
+
# self.qdrant_client.upsert(
|
102 |
+
# collection_name=collection_name,
|
103 |
+
# points= [
|
104 |
+
# [PointStruct(
|
105 |
+
# id=idx,
|
106 |
+
# vector=vector,
|
107 |
+
# payload={"text": key}
|
108 |
+
# )]
|
109 |
+
# for idx, vector in enumerate(vectors)
|
110 |
+
# ])
|
111 |
+
# self.qdrant_client.add(
|
112 |
+
# collection_name=collection_name,
|
113 |
+
# documents=[key],
|
114 |
+
# metadata=[],
|
115 |
+
# ids=str(uuid.uuid4())
|
116 |
+
# )
|
117 |
+
|
118 |
+
|
119 |
def search(
|
120 |
self,
|
121 |
query_vector: np.array,
|
|
|
130 |
|
131 |
search_results = self.qdrant_client.search(
|
132 |
collection_name=collection_name,
|
133 |
+
query_vector=('text',query_vector),
|
134 |
limit=k
|
135 |
)
|
136 |
return [(result.payload['text'], result.score) for result in search_results]
|
|
|
152 |
|
153 |
async def abuild_from_list(self, list_of_text: List[str]) -> "VectorDatabase":
|
154 |
embeddings = await self.embedding_model.async_get_embeddings(list_of_text)
|
155 |
+
# vs = VectorStruct()
|
156 |
+
# VectorStruct = Union[
|
157 |
+
# List[StrictFloat],
|
158 |
+
# List[List[StrictFloat]],
|
159 |
+
# Dict[StrictStr, Vector],
|
160 |
+
points = [
|
161 |
+
models.PointStruct(
|
162 |
+
id=str(uuid.uuid4()),
|
163 |
+
vector={ 'text': embedding},
|
164 |
+
payload={
|
165 |
+
"text": text
|
166 |
+
}
|
167 |
+
)
|
168 |
+
for text, embedding in zip(list_of_text, embeddings)
|
169 |
+
]
|
170 |
+
self.qdrant_client.upsert(
|
171 |
+
collection_name=collection_name,
|
172 |
+
points=points
|
173 |
+
)
|
174 |
return self
|
175 |
|
176 |
|