|
import chainlit as cl |
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
from langchain.document_loaders.csv_loader import CSVLoader |
|
from langchain.embeddings import CacheBackedEmbeddings |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.vectorstores import FAISS |
|
from langchain.chains import RetrievalQA |
|
from langchain.chat_models import ChatOpenAI |
|
from langchain.storage import LocalFileStore |
|
from langchain.prompts.chat import ( |
|
ChatPromptTemplate, |
|
SystemMessagePromptTemplate, |
|
HumanMessagePromptTemplate, |
|
) |
|
import chainlit as cl |
|
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) |
|
|
|
system_template = """ |
|
Use the following pieces of context to answer the users question. |
|
If you don't know the answer, just say that you don't know, don't try to make up an answer. |
|
ALWAYS return a "SOURCES" part in your answer. |
|
The "SOURCES" part should be a reference to the source of the document from which you got your answer. |
|
|
|
Example of your response should be: |
|
|
|
``` |
|
The answer is foo |
|
SOURCES: xyz |
|
``` |
|
|
|
Begin! |
|
---------------- |
|
{summaries}""" |
|
|
|
messages = [ |
|
SystemMessagePromptTemplate.from_template(system_template), |
|
HumanMessagePromptTemplate.from_template("{question}"), |
|
] |
|
prompt = ChatPromptTemplate(messages=messages) |
|
chain_type_kwargs = {"prompt": prompt} |
|
|
|
|
|
@cl.on_chat_start |
|
async def init(): |
|
msg = cl.Message(content=f"Building Index...") |
|
await msg.send() |
|
|
|
|
|
loader = CSVLoader(file_path="./data/barbie.csv", source_column="Review_Url") |
|
data = loader.load() |
|
documents = text_splitter.transform_documents(data) |
|
store = LocalFileStore("./cache/") |
|
core_embeddings_model = OpenAIEmbeddings() |
|
embedder = CacheBackedEmbeddings.from_bytes_store( |
|
core_embeddings_model, store, namespace=core_embeddings_model.model |
|
) |
|
|
|
docsearch = await cl.make_async(FAISS.from_documents)(documents, embedder) |
|
|
|
chain = RetrievalQA.from_chain_type( |
|
ChatOpenAI(model="gpt-3.5-turbo", temperature=0, streaming=True), |
|
chain_type="stuff", |
|
return_source_documents=True, |
|
retriever=docsearch.as_retriever(), |
|
) |
|
|
|
msg.content = f"Index built!" |
|
await msg.send() |
|
|
|
cl.user_session.set("chain", chain) |
|
|
|
|
|
@cl.on_message |
|
async def main(message): |
|
chain = cl.user_session.get("chain") |
|
cb = cl.AsyncLangchainCallbackHandler( |
|
stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"] |
|
) |
|
cb.answer_reached = True |
|
res = await chain.acall(message, callbacks=[cb]) |
|
|
|
answer = res["result"] |
|
source_elements = [] |
|
visited_sources = set() |
|
|
|
|
|
docs = res["source_documents"] |
|
metadatas = [doc.metadata for doc in docs] |
|
all_sources = [m["source"] for m in metadatas] |
|
|
|
for source in all_sources: |
|
if source in visited_sources: |
|
continue |
|
visited_sources.add(source) |
|
|
|
source_elements.append( |
|
cl.Text(content="https://www.imdb.com" + source, name="Review URL") |
|
) |
|
|
|
print(source_elements) |
|
|
|
if source_elements: |
|
answer += f"\nSources: {', '.join([e.content.decode('utf-8') for e in source_elements])}" |
|
else: |
|
answer += "\nNo sources found" |
|
|
|
if cb.has_streamed_final_answer: |
|
cb.final_stream.elements = source_elements |
|
await cb.final_stream.update() |
|
else: |
|
await cl.Message(content=answer, elements=source_elements).send() |
|
|