gen-ai-project / build_retriever.py
Moha782's picture
Create build_retriever.py
3e40587 verified
raw
history blame
595 Bytes
# build_retriever.py
import faiss
import json
from sentence_transformers import SentenceTransformer
# Load the extracted text
with open("apexcustoms.json", "r") as f:
documents = json.load(f)
# Load a pre-trained sentence transformer model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
document_embeddings = model.encode(documents)
# Create a FAISS index
index = faiss.IndexFlatL2(document_embeddings.shape[1])
index.add(document_embeddings)
# Save the FAISS index and the embeddings
faiss.write_index(index, "apexcustoms_index.faiss")
model.save("sentence_transformer_model")