nicoladisabato's picture
Update app.py
581d78f
raw
history blame
3.4 kB
import streamlit as st
from urllib.parse import urlparse
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain.document_loaders import WebBaseLoader, AsyncHtmlLoader
from langchain.document_transformers import Html2TextTransformer
from langchain.callbacks import get_openai_callback
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.memory import ConversationBufferWindowMemory
from streamlit_chat import message
import asyncio
from langchain.docstore.document import Document
import os
from dotenv import load_dotenv
if 'conversation' not in st.session_state:
st.session_state['conversation'] = None
if 'messages' not in st.session_state:
st.session_state['messages'] = []
st.sidebar.title("URL")
input_url = st.sidebar.text_input("Inserisci url:")
#loading openai api keys
load_dotenv()
st.title("Chat with your website 🤖")
question = st.text_area("Ask here:")
#generate the main prompt
prompt_template = """Intruction: You are a website agent that is talking with a human. Use only the chat history and the following information:
{context}
to answer in a helpful manner to the question. If you don't know the answer - say that you don't know.
Keep your replies short, compassionate and informative.
{chat_history}
### Input: {question}
### Response:
"""
prompt = PromptTemplate(
template=prompt_template, input_variables=["context", "question", "chat_history"]
)
memory = ConversationBufferWindowMemory(
memory_key="chat_history",
ai_prefix="### Response",
human_prefix="### Input",
output_key="answer",
return_messages=True
)
if st.button("Invia", type="primary"):
loader = AsyncHtmlLoader(input_url)
data = loader.load()
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(data)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 2000,
chunk_overlap=200,
separators="\n"
)
docs = text_splitter.split_documents(docs_transformed)
openai_embeddings = OpenAIEmbeddings(model_name="gpt-3.5-turbo")
vectordb = FAISS.from_documents(
documents=docs,
embedding=openai_embeddings)
retriever = vectordb.as_retriever(search_kwargs={"k": 2})
relevant_docs = retriever.get_relevant_documents(question)
if st.session_state['conversation'] is None:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
st.session_state['conversation'] = ConversationalRetrievalChain.from_llm(
llm,
chain_type='stuff',
retriever=retriever,
memory=memory,
combine_docs_chain_kwargs={"prompt": prompt},
verbose=True
)
#qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, chain_type_kwargs={"prompt": prompt})
# Write answer and sources
answer = st.empty()
with get_openai_callback() as cb:
#run the chain and generate response
response = st.session_state['conversation'](question)
print(cb)
answer.write(response["answer"])
st.write(relevant_docs)