import streamlit as st from urllib.parse import urlparse from langchain.chat_models import ChatOpenAI from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate from langchain.document_loaders import WebBaseLoader, AsyncHtmlLoader from langchain.document_transformers import Html2TextTransformer from langchain.callbacks import get_openai_callback from langchain.text_splitter import RecursiveCharacterTextSplitter import asyncio from langchain.docstore.document import Document import os from dotenv import load_dotenv #loading openai api keys load_dotenv() st.title("🤖 Chat with your website 🤖") input_url = st.text_input("Inserisci url:") question = st.text_area("Chiedi pure:") #generate the main prompt prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. {context} Question: {question} Answer: """ PROMPT = PromptTemplate( template=prompt_template, input_variables=["context", "question"] ) if st.button("Invia", type="primary"): loader = AsyncHtmlLoader(input_url) data = loader.load() html2text = Html2TextTransformer() docs_transformed = html2text.transform_documents(data) text_splitter = RecursiveCharacterTextSplitter( chunk_size = 2000, chunk_overlap=200, separators="\n" ) docs = text_splitter.split_documents(docs_transformed) openai_embeddings = OpenAIEmbeddings(model_name="gpt-3.5-turbo") vectordb = FAISS.from_documents( documents=docs, embedding=openai_embeddings) retriever = vectordb.as_retriever(search_kwargs={"k": 3}) llm = ChatOpenAI(model_name = "gpt-3.5-turbo", temperature=0) relevant_docs = retriever.get_relevant_documents(question) qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, chain_type_kwargs={"prompt": PROMPT}) # Write answer and sources answer = st.empty() with get_openai_callback() as cb: #run the chain and generate response response = qa(question) print(cb) answer.write(response['result']) st.write(relevant_docs)