import streamlit as st import os from streamlit_chat import message from PyPDF2 import PdfReader import bs4 import google.generativeai as genai from langchain.prompts import PromptTemplate from langchain import LLMChain from langchain_google_genai import ChatGoogleGenerativeAI import nest_asyncio from langchain.document_loaders import WebBaseLoader nest_asyncio.apply() os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY") genai.configure(api_key=os.environ["GOOGLE_API_KEY"]) llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.4) template = """ You are CRETA, a friendly and knowledgeable chatbot created by Suriya, an AI enthusiast. You are designed to assist by providing information based on the content from provided documents and the text extracted from URLs. Previous Conversations: {chat_history} Document Content Provided: {provided_docs} Extracted URL Text: {extracted_text} If the human asks about the content from a URL, please use the information from the 'Extracted URL Text' to provide detailed answers. If the question pertains to general knowledge or other information, use what you know. Human: {human_input} Chatbot: """ prompt = PromptTemplate( input_variables=["chat_history", "human_input", "provided_docs", "extracted_text"], template=template ) llm_chain = LLMChain( llm=llm, prompt=prompt, verbose=True, ) previous_response = "" provided_docs = "" def conversational_chat(query): global previous_response, provided_docs, extracted_text previous_response = "".join([f"Human: {i[0]}\nChatbot: {i[1]}" for i in st.session_state['history'] if i is not None]) provided_docs = "".join([doc for doc in st.session_state["docs"] if doc is not None]) extracted_text = "".join([text for text in st.session_state["extracted_text"] if text is not None]) result = llm_chain.predict( chat_history=previous_response, human_input=query, provided_docs=provided_docs, extracted_text=extracted_text ) st.session_state['history'].append((query, result)) return result st.title("Chat Bot:") st.text("I am CRETA Your Friendly Assitant") st.markdown("Built by [Suriya❤️](https://github.com/theSuriya)") if 'history' not in st.session_state: st.session_state['history'] = [] # Initialize messages if 'generated' not in st.session_state: st.session_state['generated'] = ["Hello ! Ask me anything"] if 'past' not in st.session_state: st.session_state['past'] = [" "] if 'docs' not in st.session_state: st.session_state['docs'] = [] if "extracted_text" not in st.session_state: st.session_state["extracted_text"] = [] def get_pdf_text(pdf_docs): text = "" for pdf in pdf_docs: pdf_reader = PdfReader(pdf) for page in pdf_reader.pages: text += page.extract_text() return text def get_url_text(url_link): try: loader = WebBaseLoader(url_link) loader.requests_per_second = 1 docs = loader.aload() extracted_text = "" for page in docs: extracted_text += page.page_content return extracted_text except Exception as e: print(f"Error fetching or processing URL: {e}") return "" with st.sidebar: st.title("Add a file for CRETA memory:") uploaded_files = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True) uploaded_url = st.text_area("Please upload a URL:") if st.button("Submit & Process"): if uploaded_files or uploaded_url: with st.spinner("Processing..."): if uploaded_files: pdf_text = get_pdf_text(uploaded_files) st.session_state["docs"] += get_pdf_text(uploaded_files) if uploaded_url: url_text = get_url_text(uploaded_url) st.session_state["extracted_text"] += get_url_text(uploaded_url) st.success("Processing complete!") else: st.error("Please upload at least one PDF file or provide a URL.") # Create containers for chat history and user input response_container = st.container() container = st.container() # User input form user_input = st.chat_input("Ask Your Questions 👉..") with container: if user_input: output = conversational_chat(user_input) # answer = response_generator(output) st.session_state['past'].append(user_input) st.session_state['generated'].append(output) # Display chat history if st.session_state['generated']: with response_container: for i in range(len(st.session_state['generated'])): if i != 0: message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="adventurer") message(st.session_state["generated"][i], key=str(i), avatar_style="bottts")