File size: 3,398 Bytes
848e3eb
 
 
 
 
 
84bd684
848e3eb
 
 
 
 
84bd684
8744ba9
848e3eb
 
 
 
 
 
 
 
8744ba9
 
 
 
 
 
 
 
 
 
 
848e3eb
 
 
 
581d78f
848e3eb
8744ba9
848e3eb
581d78f
848e3eb
 
84bd684
848e3eb
 
 
84bd684
 
 
 
 
 
848e3eb
 
84bd684
 
 
 
 
 
 
 
 
 
 
 
848e3eb
 
84bd684
8744ba9
 
 
 
848e3eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
581d78f
8744ba9
848e3eb
84bd684
8744ba9
 
84bd684
8744ba9
 
 
 
 
 
 
 
84bd684
 
848e3eb
 
 
 
 
 
8744ba9
848e3eb
 
8744ba9
84bd684
848e3eb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import streamlit as st
from urllib.parse import urlparse
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain.document_loaders import WebBaseLoader, AsyncHtmlLoader
from langchain.document_transformers import Html2TextTransformer
from langchain.callbacks import get_openai_callback
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.memory import ConversationBufferWindowMemory
from streamlit_chat import message

import asyncio
from langchain.docstore.document import Document

import os
from dotenv import load_dotenv



if 'conversation' not in st.session_state:
    st.session_state['conversation'] = None
if 'messages' not in st.session_state:
    st.session_state['messages'] = []



st.sidebar.title("URL")
input_url = st.sidebar.text_input("Inserisci url:")

#loading openai api keys
load_dotenv()


st.title("Chat with your website 🤖")



question = st.text_area("Ask here:")

#generate the main prompt
prompt_template = """Intruction: You are a website agent that is talking with a human. Use only the chat history and the following information:

{context}

to answer in a helpful manner to the question. If you don't know the answer - say that you don't know. 
Keep your replies short, compassionate and informative. 
{chat_history}

### Input: {question}
### Response:
"""


prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question", "chat_history"]
)


memory = ConversationBufferWindowMemory(
    memory_key="chat_history",
    ai_prefix="### Response",
    human_prefix="### Input",
    output_key="answer",
    return_messages=True
)


    



if st.button("Invia", type="primary"):
    
    loader = AsyncHtmlLoader(input_url)
    data = loader.load()
    html2text = Html2TextTransformer()
    docs_transformed = html2text.transform_documents(data)

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 2000,
        chunk_overlap=200,
        separators="\n"
    )

    docs = text_splitter.split_documents(docs_transformed)

    openai_embeddings = OpenAIEmbeddings(model_name="gpt-3.5-turbo")

    vectordb = FAISS.from_documents(
        documents=docs,
        embedding=openai_embeddings)

    retriever = vectordb.as_retriever(search_kwargs={"k": 2})

    relevant_docs = retriever.get_relevant_documents(question)

    if st.session_state['conversation'] is None:
        llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

        st.session_state['conversation'] = ConversationalRetrievalChain.from_llm(
            llm,
            chain_type='stuff',
            retriever=retriever,
            memory=memory,
            combine_docs_chain_kwargs={"prompt": prompt},
            verbose=True
        )

    #qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, chain_type_kwargs={"prompt": prompt})

     # Write answer and sources
    answer = st.empty()

    with get_openai_callback() as cb:
        #run the chain and generate response
        response = st.session_state['conversation'](question)
        print(cb)
    
    
    answer.write(response["answer"])
    st.write(relevant_docs)