Spaces:

DrishtiSharma
/

chat-w-docs-via-speech-or-text

Sleeping

App Files Files Community

DrishtiSharma commited on Dec 9, 2024

Commit

c320ec9

verified ·

1 Parent(s): fadbcd4

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -34

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ from langchain_chroma import Chroma
 from langchain_groq import ChatGroq
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 # Ensure required environment variables are set
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
@@ -12,19 +13,17 @@ if not GROQ_API_KEY:
     st.error("GROQ_API_KEY is not set. Please configure it in Hugging Face Spaces secrets.")
     st.stop()
-# Function to set up the vectorstore
-def setup_vectorstore():
-    working_dir = os.path.dirname(os.path.abspath(__file__))
-    persist_directory = f"{working_dir}/vector_db_dir"
-    # Initialize HuggingFace Embeddings
     embeddings = HuggingFaceEmbeddings()
-    # Initialize Chroma vectorstore
-    vectorstore = Chroma(
-        persist_directory=persist_directory,
-        embedding_function=embeddings
-    )
     return vectorstore
 # Function to set up the chat chain
@@ -32,7 +31,6 @@ def chat_chain(vectorstore):
     llm = ChatGroq(model="llama-3.1-70b-versatile",
                    temperature=0,
                    groq_api_key=GROQ_API_KEY)
     retriever = vectorstore.as_retriever()
     memory = ConversationBufferMemory(
         llm=llm,
@@ -60,15 +58,20 @@ st.set_page_config(
 st.title("📚 Multi Documents Chatbot")
-# Initialize session state variables
-if "chat_history" not in st.session_state:
-    st.session_state.chat_history = []
-if "vectorstore" not in st.session_state:
-    st.session_state.vectorstore = setup_vectorstore()
-if "conversational_chain" not in st.session_state:
-    st.session_state.conversational_chain = chat_chain(st.session_state.vectorstore)
 # Display chat history
 for message in st.session_state.chat_history:
@@ -76,18 +79,20 @@ for message in st.session_state.chat_history:
         st.markdown(message["content"])
 # User input
-user_input = st.chat_input("Ask AI...")
-if user_input:
-    st.session_state.chat_history.append({"role": "user", "content": user_input})
-    with st.chat_message("user"):
-        st.markdown(user_input)
-    with st.chat_message("assistant"):
-        # Generate response
-        response = st.session_state.conversational_chain({"question": user_input})
-        assistant_response = response["answer"]
-        st.markdown(assistant_response)
-        st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})

 from langchain_groq import ChatGroq
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
+from PyPDF2 import PdfReader
 # Ensure required environment variables are set
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
     st.error("GROQ_API_KEY is not set. Please configure it in Hugging Face Spaces secrets.")
     st.stop()
+# Function to process PDFs and set up the vectorstore
+def process_and_store_pdfs(uploaded_files):
+    texts = []
+    for uploaded_file in uploaded_files:
+        reader = PdfReader(uploaded_file)
+        for page in reader.pages:
+            texts.append(page.extract_text())
+    # Combine and embed the texts
     embeddings = HuggingFaceEmbeddings()
+    vectorstore = Chroma.from_texts(texts, embedding=embeddings)
     return vectorstore
 # Function to set up the chat chain
     llm = ChatGroq(model="llama-3.1-70b-versatile",
                    temperature=0,
                    groq_api_key=GROQ_API_KEY)
     retriever = vectorstore.as_retriever()
     memory = ConversationBufferMemory(
         llm=llm,
 st.title("📚 Multi Documents Chatbot")
+# File uploader for PDFs
+uploaded_files = st.file_uploader("Upload PDF files", accept_multiple_files=True, type=["pdf"])
+# Process PDFs and initialize the vectorstore
+if uploaded_files:
+    with st.spinner("Processing files..."):
+        vectorstore = process_and_store_pdfs(uploaded_files)
+        st.session_state.vectorstore = vectorstore
+        st.session_state.conversational_chain = chat_chain(vectorstore)
+    st.success("Files successfully processed! You can now chat with your documents.")
+# Initialize chat history
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
 # Display chat history
 for message in st.session_state.chat_history:
         st.markdown(message["content"])
 # User input
+if "conversational_chain" in st.session_state:
+    user_input = st.chat_input("Ask AI...")
+    if user_input:
+        st.session_state.chat_history.append({"role": "user", "content": user_input})
+        with st.chat_message("user"):
+            st.markdown(user_input)
+        with st.chat_message("assistant"):
+            # Generate response
+            response = st.session_state.conversational_chain({"question": user_input})
+            assistant_response = response["answer"]
+            st.markdown(assistant_response)
+            st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
+else:
+    st.info("Please upload PDF files to start chatting.")