File size: 1,676 Bytes
eb66dcb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
from faiss import IndexFlatL2,write_index,read_index
import numpy as np
from utils.convert_embedding import GetEmbedding
class VectorStore:
def __init__(self):
pass
def store_vectors(self,data:list,embedding_space_name:str = 'faiss_index.index',numpy_emb_space:str = 'embeddings.npy' ):
try:
embeddings = GetEmbedding(data=data).convert_emb()
diamension = embeddings.shape[1]
print("Diamension",diamension)
# Create L2 distance index
index = IndexFlatL2(diamension)
index.add(embeddings)
write_index(index, embedding_space_name)
# Save embeddings to file
np.save(numpy_emb_space, embeddings)
return True
except Exception as e:
print(e)
return False
def get_similary_search(self,query,embedding_space_name:str = 'faiss_index.index',numpy_emb_space:str = 'embeddings.npy',K:int= 1):
# Load the FAISS index
index = read_index('faiss_index.index')
# Load the embeddings
embeddings_np = np.load('embeddings.npy')
# Now you can perform similarity searches on the index
query = "What is photosynthesis?"
query_embedding = GetEmbedding([query]).convert_emb()
query_embedding = query_embedding.detach().numpy()
# query_embedding = np.array(query_embedding) # Convert to numpy array
# query_embedding = query_embedding.reshape(1, -1)
# print("shape")
# print(query_embedding.shape)
# Perform search
distances, indices = index.search(query_embedding, k = K)
return indices
|