SEA-dog / chatbot.py
kevinconka's picture
Save query + result in dataset
273f5f9
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import UnstructuredHTMLLoader
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import Chroma
def get_retrieval_qa(filename):
# load documents
loader = UnstructuredHTMLLoader(filename)
documents = loader.load()
# split the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
# select which embeddings we want to use
embeddings = OpenAIEmbeddings()
# create the vectorestore to use as the index
db = Chroma.from_documents(texts, embeddings)
# expose this index in a retriever interface
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 2})
# create a chain to answer questions
return RetrievalQA.from_chain_type(
llm=ChatOpenAI(),
chain_type="stuff",
retriever=retriever,
return_source_documents=True,
verbose=True,
)