used multi-qa-MiniLM-L6-cos-v1 model and cosine distance strategy in vectorstore retriever
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ import gradio as gr
|
|
3 |
from langchain.text_splitter import CharacterTextSplitter
|
4 |
from langchain_community.document_loaders import UnstructuredFileLoader
|
5 |
from langchain.vectorstores.faiss import FAISS
|
|
|
6 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
7 |
|
8 |
from langchain.chains import RetrievalQA
|
@@ -55,8 +56,12 @@ def prepare_vector_store_retriever(filename):
|
|
55 |
documents = text_splitter.split_documents(raw_documents)
|
56 |
|
57 |
# Creating a vectorstore
|
58 |
-
embeddings = HuggingFaceEmbeddings(
|
59 |
-
|
|
|
|
|
|
|
|
|
60 |
|
61 |
return VectorStoreRetriever(vectorstore=vectorstore, search_kwargs={"k": 2})
|
62 |
|
|
|
3 |
from langchain.text_splitter import CharacterTextSplitter
|
4 |
from langchain_community.document_loaders import UnstructuredFileLoader
|
5 |
from langchain.vectorstores.faiss import FAISS
|
6 |
+
from langchain.vectorstores.utils import DistanceStrategy
|
7 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
8 |
|
9 |
from langchain.chains import RetrievalQA
|
|
|
56 |
documents = text_splitter.split_documents(raw_documents)
|
57 |
|
58 |
# Creating a vectorstore
|
59 |
+
embeddings = HuggingFaceEmbeddings(
|
60 |
+
model_name="sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
|
61 |
+
model_kwargs={'device': 'cpu'},
|
62 |
+
encode_kwargs={'normalize_embeddings': False}
|
63 |
+
)
|
64 |
+
vectorstore = FAISS.from_documents(documents, embeddings, distance_strategy=DistanceStrategy.COSINE)
|
65 |
|
66 |
return VectorStoreRetriever(vectorstore=vectorstore, search_kwargs={"k": 2})
|
67 |
|