Spaces:

DrishtiSharma
/

chat-w-docs-via-speech-or-text

Sleeping

App Files Files Community

DrishtiSharma commited on Dec 9, 2024

Commit

bd82d31

verified ·

1 Parent(s): a92a1de

Create interim.py

Browse files

Files changed (1) hide show

interim.py +105 -0

interim.py ADDED Viewed

	@@ -0,0 +1,105 @@

+#ref: https://www.youtube.com/watch?v=3ZDVmzlM6Nc
+import os
+import chromadb
+from chromadb import Client, Settings
+import streamlit as st
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_chroma import Chroma
+from langchain_groq import ChatGroq
+from langchain.memory import ConversationBufferMemory
+from langchain.chains import ConversationalRetrievalChain
+from PyPDF2 import PdfReader
+# Clear ChromaDB cache to fix tenant issue
+chromadb.api.client.SharedSystemClient.clear_system_cache()
+# Ensure required environment variables are set
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+if not GROQ_API_KEY:
+    st.error("GROQ_API_KEY is not set. Please configure it in Hugging Face Spaces secrets.")
+    st.stop()
+# Function to process PDFs and set up the vectorstore
+def process_and_store_pdfs(uploaded_files):
+    texts = []
+    for uploaded_file in uploaded_files:
+        reader = PdfReader(uploaded_file)
+        for page in reader.pages:
+            texts.append(page.extract_text())
+    # Combine and embed the texts
+    embeddings = HuggingFaceEmbeddings()
+    vectorstore = Chroma.from_texts(texts, embedding=embeddings)
+    return vectorstore
+# Function to set up the chat chain
+def chat_chain(vectorstore):
+    llm = ChatGroq(model="llama-3.1-70b-versatile",
+                   temperature=0,
+                   groq_api_key=GROQ_API_KEY)
+    retriever = vectorstore.as_retriever()
+    memory = ConversationBufferMemory(
+        llm=llm,
+        output_key="answer",
+        memory_key="chat_history",
+        return_messages=True
+    )
+    chain = ConversationalRetrievalChain.from_llm(
+        llm=llm,
+        retriever=retriever,
+        chain_type="stuff",
+        memory=memory,
+        verbose=True,
+        return_source_documents=True
+    )
+    return chain
+# Streamlit UI configuration
+st.set_page_config(
+    page_title="Multi Doc Chat",
+    page_icon="📚",
+    layout="centered"
+)
+st.title("Chat with Your Docs📚")
+# File uploader for PDFs
+uploaded_files = st.file_uploader("Upload PDF files", accept_multiple_files=True, type=["pdf"])
+# Process PDFs and initialize the vectorstore
+if uploaded_files:
+    with st.spinner("Processing files..."):
+        vectorstore = process_and_store_pdfs(uploaded_files)
+        st.session_state.vectorstore = vectorstore
+        st.session_state.conversational_chain = chat_chain(vectorstore)
+    st.success("Files successfully processed! You can now chat with your documents.")
+# Initialize chat history
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
+# Display chat history
+for message in st.session_state.chat_history:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+# User input
+if "conversational_chain" in st.session_state:
+    user_input = st.chat_input("Ask AI...")
+    if user_input:
+        st.session_state.chat_history.append({"role": "user", "content": user_input})
+        with st.chat_message("user"):
+            st.markdown(user_input)
+        with st.chat_message("assistant"):
+            # Generate response
+            response = st.session_state.conversational_chain({"question": user_input})
+            assistant_response = response["answer"]
+            st.markdown(assistant_response)
+            st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
+else:
+    st.info("Please upload PDF files to start chatting.")