jeevan commited on
Commit
4c501f4
·
1 Parent(s): 637aeec

working local version

Browse files
Files changed (2) hide show
  1. RagPipeline.py +1 -1
  2. aimakerspace/vectordatabase.py +45 -12
RagPipeline.py CHANGED
@@ -23,7 +23,7 @@ class RetrievalAugmentedQAPipeline:
23
  context_list = self.vector_db_retriever.search_by_text(user_query, k=4)
24
 
25
  context_prompt = ""
26
- for context in context_list[0]:
27
  context_prompt += context[0] + "\n"
28
 
29
  formatted_system_prompt = self.system_role_prompt.create_message()
 
23
  context_list = self.vector_db_retriever.search_by_text(user_query, k=4)
24
 
25
  context_prompt = ""
26
+ for context in context_list:
27
  context_prompt += context[0] + "\n"
28
 
29
  formatted_system_prompt = self.system_role_prompt.create_message()
aimakerspace/vectordatabase.py CHANGED
@@ -6,7 +6,7 @@ from typing import List, Tuple, Callable
6
  from aimakerspace.openai_utils.embedding import EmbeddingModel
7
  import asyncio
8
  from qdrant_client import models, QdrantClient
9
- from qdrant_client.models import PointStruct,VectorParams,Distance
10
 
11
  collection_name = "embedding_collection"
12
 
@@ -76,20 +76,20 @@ class VectorDatabase:
76
  self.qdrant_client = QdrantClient(":memory:")
77
  vector_params = VectorParams(
78
  size=embedding_model.dimensions, # vector size
79
- distance="Cosine" # distance metric
80
  )
81
- self.qdrant_client.recreate_collection(
82
  collection_name=collection_name,
83
- vectors_config={"default": vector_params},
84
  )
85
 
86
- def insert(self, key: str, vector: np.array) -> None:
87
  idx = str(uuid.uuid4())
88
  payload = {"text": key}
89
 
90
  point = PointStruct(
91
  id=idx,
92
- vector={"default": vector.tolist()},
93
  payload=payload
94
  )
95
  # Insert the vector into Qdrant with the associated document
@@ -97,9 +97,25 @@ class VectorDatabase:
97
  collection_name=collection_name,
98
  points=[point]
99
  )
100
- print(f"Inserted vector with ID {idx}: {vector}")
101
-
102
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  def search(
104
  self,
105
  query_vector: np.array,
@@ -114,7 +130,7 @@ class VectorDatabase:
114
 
115
  search_results = self.qdrant_client.search(
116
  collection_name=collection_name,
117
- query_vector=query_vector,
118
  limit=k
119
  )
120
  return [(result.payload['text'], result.score) for result in search_results]
@@ -136,8 +152,25 @@ class VectorDatabase:
136
 
137
  async def abuild_from_list(self, list_of_text: List[str]) -> "VectorDatabase":
138
  embeddings = await self.embedding_model.async_get_embeddings(list_of_text)
139
- for text, embedding in zip(list_of_text, embeddings):
140
- self.insert(text, np.array(embedding))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  return self
142
 
143
 
 
6
  from aimakerspace.openai_utils.embedding import EmbeddingModel
7
  import asyncio
8
  from qdrant_client import models, QdrantClient
9
+ from qdrant_client.models import PointStruct,VectorParams,Distance,Batch,VectorStruct,Payload
10
 
11
  collection_name = "embedding_collection"
12
 
 
76
  self.qdrant_client = QdrantClient(":memory:")
77
  vector_params = VectorParams(
78
  size=embedding_model.dimensions, # vector size
79
+ distance=Distance.COSINE
80
  )
81
+ self.qdrant_client.create_collection(
82
  collection_name=collection_name,
83
+ vectors_config={"text": vector_params},
84
  )
85
 
86
+ def insert(self, key: str, vectors: np.array) -> None:
87
  idx = str(uuid.uuid4())
88
  payload = {"text": key}
89
 
90
  point = PointStruct(
91
  id=idx,
92
+ vector={"default": vectors.tolist()},
93
  payload=payload
94
  )
95
  # Insert the vector into Qdrant with the associated document
 
97
  collection_name=collection_name,
98
  points=[point]
99
  )
100
+ # print(f"Inserted vector with ID {idx}: {vector}")
101
+ # self.qdrant_client.upsert(
102
+ # collection_name=collection_name,
103
+ # points= [
104
+ # [PointStruct(
105
+ # id=idx,
106
+ # vector=vector,
107
+ # payload={"text": key}
108
+ # )]
109
+ # for idx, vector in enumerate(vectors)
110
+ # ])
111
+ # self.qdrant_client.add(
112
+ # collection_name=collection_name,
113
+ # documents=[key],
114
+ # metadata=[],
115
+ # ids=str(uuid.uuid4())
116
+ # )
117
+
118
+
119
  def search(
120
  self,
121
  query_vector: np.array,
 
130
 
131
  search_results = self.qdrant_client.search(
132
  collection_name=collection_name,
133
+ query_vector=('text',query_vector),
134
  limit=k
135
  )
136
  return [(result.payload['text'], result.score) for result in search_results]
 
152
 
153
  async def abuild_from_list(self, list_of_text: List[str]) -> "VectorDatabase":
154
  embeddings = await self.embedding_model.async_get_embeddings(list_of_text)
155
+ # vs = VectorStruct()
156
+ # VectorStruct = Union[
157
+ # List[StrictFloat],
158
+ # List[List[StrictFloat]],
159
+ # Dict[StrictStr, Vector],
160
+ points = [
161
+ models.PointStruct(
162
+ id=str(uuid.uuid4()),
163
+ vector={ 'text': embedding},
164
+ payload={
165
+ "text": text
166
+ }
167
+ )
168
+ for text, embedding in zip(list_of_text, embeddings)
169
+ ]
170
+ self.qdrant_client.upsert(
171
+ collection_name=collection_name,
172
+ points=points
173
+ )
174
  return self
175
 
176