ATDCChatbot / app.py
CSAle's picture
Releasing RubiChat App
d2f126b
raw
history blame
2.77 kB
import chainlit as cl
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders import BSHTMLLoader
from langchain.embeddings import CacheBackedEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.storage import LocalFileStore
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)
import chainlit as cl
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
system_template = """
Use the following pieces of context to answer the users question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
ALWAYS return a "SOURCES" part in your answer.
The "SOURCES" part should be a reference to the source of the document from which you got your answer.
Example of your response should be:
```
The answer is foo
SOURCES: xyz
```
Begin!
----------------
{context}"""
messages = [
SystemMessagePromptTemplate.from_template(system_template),
HumanMessagePromptTemplate.from_template("{question}"),
]
prompt = ChatPromptTemplate(messages=messages)
chain_type_kwargs = {"prompt": prompt}
@cl.on_chat_start
async def init():
msg = cl.Message(content=f"Building Index...")
await msg.send()
# build FAISS index from csv
loader = BSHTMLLoader(file_path="./data/Rubilabs.html")
data = loader.load()
documents = text_splitter.transform_documents(data)
store = LocalFileStore("./cache/")
core_embeddings_model = OpenAIEmbeddings()
embedder = CacheBackedEmbeddings.from_bytes_store(
core_embeddings_model, store, namespace=core_embeddings_model.model
)
# make async docsearch
docsearch = await cl.make_async(FAISS.from_documents)(documents, embedder)
chain = RetrievalQA.from_chain_type(
ChatOpenAI(model="gpt-3.5-turbo", temperature=0, streaming=True),
chain_type="stuff",
return_source_documents=True,
retriever=docsearch.as_retriever(),
chain_type_kwargs=chain_type_kwargs,
)
msg.content = f"Index built!"
await msg.send()
cl.user_session.set("chain", chain)
@cl.on_message
async def main(message):
chain = cl.user_session.get("chain")
cb = cl.AsyncLangchainCallbackHandler(
stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"]
)
cb.answer_reached = True
res = await chain.acall(message, callbacks=[cb])
answer = res["result"]
if cb.has_streamed_final_answer:
await cb.final_stream.update()
else:
await cl.Message(content=answer).send()