File size: 4,870 Bytes
cbcf653
 
 
29002d7
1da8d89
cbcf653
 
 
 
f8d0caa
 
 
 
cbcf653
29002d7
cbcf653
 
f366e0e
29002d7
cbcf653
 
f8d0caa
16d0d57
4d60b1c
16d0d57
 
cbcf653
16d0d57
4d60b1c
16d0d57
 
 
 
 
4d60b1c
 
cbcf653
16d0d57
cbcf653
 
16d0d57
 
cbcf653
 
4d60b1c
cbcf653
 
 
 
 
 
 
 
29002d7
cbcf653
16d0d57
 
 
 
82966fe
16d0d57
 
 
 
 
 
cbcf653
 
 
29002d7
 
d8ad835
cbcf653
 
 
 
 
 
 
 
 
 
 
29002d7
 
f8d0caa
 
82966fe
29002d7
 
 
 
 
 
 
 
 
f8d0caa
57e0e7e
 
 
 
 
 
 
 
 
 
 
f8d0caa
 
29002d7
 
5fd8183
 
 
29002d7
1da8d89
f8d0caa
1da8d89
5fd8183
a8b594a
1da8d89
 
 
a8b594a
1da8d89
 
 
 
29002d7
cbcf653
 
 
 
 
 
 
 
 
 
 
 
29002d7
 
cbcf653
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import streamlit as st
import os
from streamlit_chat import message
from PyPDF2 import PdfReader
import bs4
import google.generativeai as genai
from langchain.prompts import PromptTemplate
from langchain import LLMChain
from langchain_google_genai import ChatGoogleGenerativeAI
import nest_asyncio
from langchain.document_loaders import WebBaseLoader

nest_asyncio.apply()

os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

llm = ChatGoogleGenerativeAI(model="gemini-pro",
                             temperature=0.4)



template = """
You are CRETA, a friendly and knowledgeable chatbot created by Suriya, an AI enthusiast. Your job is to help answer questions using provided documents and website content effectively.

Previous Conversation:
{chat_history}

Document Content Provided:
{provided_docs}

Extracted URL Text:
{extracted_text}

When the question pertains to the content of the URL, try to answer using the extracted text primarily.

Human: {human_input}
Chatbot: """

prompt = PromptTemplate(
    input_variables=["chat_history", "human_input", "provided_docs", "extracted_text"], 
    template=template
)


llm_chain = LLMChain(
    llm=llm,
    prompt=prompt,
    verbose=True,
)


previous_response = ""
provided_docs = ""
def conversational_chat(query):
    global previous_response, provided_docs, extracted_text
    previous_response = "".join([f"Human: {i[0]}\nChatbot: {i[1]}" for i in st.session_state['history'] if i is not None])
    provided_docs = "".join([doc for doc in st.session_state["docs"] if doc is not None])
    extracted_text = "".join([text for text in st.session_state["extracted_text"] if text is not None])
    
    result = llm_chain.predict(
        chat_history=previous_response, 
        human_input=query, 
        provided_docs=provided_docs, 
        extracted_text=extracted_text
    )
    st.session_state['history'].append((query, result))
    return result

st.title("Chat Bot:")
st.text("I am CRETA Your Friendly Assitant")
st.markdown("Built by [Suriya❤️](https://github.com/theSuriya)")

if 'history' not in st.session_state:
    st.session_state['history'] = []
    
# Initialize messages
if 'generated' not in st.session_state:
    st.session_state['generated'] = ["Hello ! Ask me anything"]

if 'past' not in st.session_state:
    st.session_state['past'] = [" "]
    
if 'docs' not in st.session_state:
    st.session_state['docs'] = []

if "extracted_text" not in st.session_state:
    st.session_state["extracted_text"] = []
    
def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

def get_url_text(url_link):
    try:
        loader = WebBaseLoader(url_link)
        loader.requests_per_second = 1
        docs = loader.aload()
        extracted_text = ""
        for page in docs:
            extracted_text += page.page_content
        return extracted_text
    except Exception as e:
        print(f"Error fetching or processing URL: {e}")
        return ""
    

with st.sidebar:
    st.title("Add a file for CRETA memory:")
    uploaded_files = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
    uploaded_url = st.text_area("Please upload a URL:")

    if st.button("Submit & Process"):
        if uploaded_files or uploaded_url:
            with st.spinner("Processing..."):
                if uploaded_files:
                    pdf_text = get_pdf_text(uploaded_files)
                    st.session_state["docs"] += get_pdf_text(uploaded_files)
                
                if uploaded_url:
                    url_text = get_url_text(uploaded_url)
                    st.session_state["extracted_text"] += get_url_text(uploaded_url)
                
                st.success("Processing complete!")
        else:
            st.error("Please upload at least one PDF file or provide a URL.")
    
# Create containers for chat history and user input
response_container = st.container()
container = st.container()

# User input form
user_input = st.chat_input("Ask Your Questions 👉..")
with container:
    if user_input:
        output = conversational_chat(user_input)
        # answer = response_generator(output)
        st.session_state['past'].append(user_input)
        st.session_state['generated'].append(output)
        
        
# Display chat history
if st.session_state['generated']:
    with response_container:
        for i in range(len(st.session_state['generated'])):
            if i != 0:
                message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="adventurer")
            message(st.session_state["generated"][i], key=str(i), avatar_style="bottts")