import urllib.request from langchain.chains import RetrievalQA from langchain_community.document_loaders import UnstructuredHTMLLoader from langchain_openai import OpenAIEmbeddings from langchain_openai import ChatOpenAI from langchain.text_splitter import CharacterTextSplitter from langchain_community.vectorstores import Chroma import gradio as gr # get the html data and save it to a file url = "https://sea.ai/faq" html = urllib.request.urlopen(url).read() with open("FAQ_SEA.AI.html", "wb") as f: f.write(html) # load documents loader = UnstructuredHTMLLoader("FAQ_SEA.AI.html") documents = loader.load() # split the documents into chunks text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) texts = text_splitter.split_documents(documents) # select which embeddings we want to use embeddings = OpenAIEmbeddings() # create the vectorestore to use as the index db = Chroma.from_documents(texts, embeddings) # expose this index in a retriever interface retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 2}) # create a chain to answer questions qa = RetrievalQA.from_chain_type( llm=ChatOpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True, verbose=True, ) def answer_question(message, history, system): # unwind the history of last 2 messages history = " ".join(f"{user} {bot}" for user, bot in history[-2:]) # concatenate the history, message and system query = " ".join([history, message, system]) retrieval_qa = qa.invoke(query) result = retrieval_qa["result"] result = result.replace('"', "").strip() # clean up the result # query = retrieval_qa["query"] # source_documents = retrieval_qa["source_documents"] return result title = "✨ SEA Dog" description = """
I have memorized the entire SEA.AI FAQ page. Ask me anything about it! 🧠
You can modify my response by using the SYSTEM
input under
Additional Inputs
.