from langchain.chains import RetrievalQA from langchain_community.document_loaders import UnstructuredHTMLLoader from langchain_openai import OpenAIEmbeddings from langchain_openai import ChatOpenAI from langchain.text_splitter import CharacterTextSplitter from langchain_community.vectorstores import Chroma def get_retrieval_qa(filename): # load documents loader = UnstructuredHTMLLoader(filename) documents = loader.load() # split the documents into chunks text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) texts = text_splitter.split_documents(documents) # select which embeddings we want to use embeddings = OpenAIEmbeddings() # create the vectorestore to use as the index db = Chroma.from_documents(texts, embeddings) # expose this index in a retriever interface retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 2}) # create a chain to answer questions return RetrievalQA.from_chain_type( llm=ChatOpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True, verbose=True, )