Spaces:

DrishtiSharma
/

chat-w-docs-via-speech-or-text

Sleeping

App Files Files Community

DrishtiSharma commited on Dec 9, 2024

Commit

130b915

verified ·

1 Parent(s): bd82d31

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -56

app.py CHANGED Viewed

@@ -2,14 +2,17 @@
 import os
 import chromadb
-from chromadb import Client, Settings
 import streamlit as st
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_chroma import Chroma
 from langchain_groq import ChatGroq
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 from PyPDF2 import PdfReader
 # Clear ChromaDB cache to fix tenant issue
 chromadb.api.client.SharedSystemClient.clear_system_cache()
@@ -28,23 +31,15 @@ def process_and_store_pdfs(uploaded_files):
         for page in reader.pages:
             texts.append(page.extract_text())
-    # Combine and embed the texts
     embeddings = HuggingFaceEmbeddings()
-    vectorstore = Chroma.from_texts(texts, embedding=embeddings)
     return vectorstore
 # Function to set up the chat chain
 def chat_chain(vectorstore):
-    llm = ChatGroq(model="llama-3.1-70b-versatile",
-                   temperature=0,
-                   groq_api_key=GROQ_API_KEY)
     retriever = vectorstore.as_retriever()
-    memory = ConversationBufferMemory(
-        llm=llm,
-        output_key="answer",
-        memory_key="chat_history",
-        return_messages=True
-    )
     chain = ConversationalRetrievalChain.from_llm(
         llm=llm,
@@ -56,50 +51,96 @@ def chat_chain(vectorstore):
     )
     return chain
-# Streamlit UI configuration
-st.set_page_config(
-    page_title="Multi Doc Chat",
-    page_icon="📚",
-    layout="centered"
-)
-st.title("Chat with Your Docs📚")
-# File uploader for PDFs
-uploaded_files = st.file_uploader("Upload PDF files", accept_multiple_files=True, type=["pdf"])
-# Process PDFs and initialize the vectorstore
 if uploaded_files:
-    with st.spinner("Processing files..."):
-        vectorstore = process_and_store_pdfs(uploaded_files)
-        st.session_state.vectorstore = vectorstore
-        st.session_state.conversational_chain = chat_chain(vectorstore)
-    st.success("Files successfully processed! You can now chat with your documents.")
-# Initialize chat history
-if "chat_history" not in st.session_state:
-    st.session_state.chat_history = []
-# Display chat history
-for message in st.session_state.chat_history:
-    with st.chat_message(message["role"]):
-        st.markdown(message["content"])
-# User input
-if "conversational_chain" in st.session_state:
-    user_input = st.chat_input("Ask AI...")
-    if user_input:
-        st.session_state.chat_history.append({"role": "user", "content": user_input})
-        with st.chat_message("user"):
-            st.markdown(user_input)
-        with st.chat_message("assistant"):
-            # Generate response
-            response = st.session_state.conversational_chain({"question": user_input})
-            assistant_response = response["answer"]
-            st.markdown(assistant_response)
-            st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
 else:
     st.info("Please upload PDF files to start chatting.")

 import os
 import chromadb
 import streamlit as st
+from base64 import b64decode
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_chroma import Chroma
 from langchain_groq import ChatGroq
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 from PyPDF2 import PdfReader
+from gtts import gTTS
+from pydub import AudioSegment
+from pydub.playback import play
 # Clear ChromaDB cache to fix tenant issue
 chromadb.api.client.SharedSystemClient.clear_system_cache()
         for page in reader.pages:
             texts.append(page.extract_text())
     embeddings = HuggingFaceEmbeddings()
+    vectorstore = Chroma.from_texts(texts, embedding=embeddings, persist_directory="vector_db_dir")
     return vectorstore
 # Function to set up the chat chain
 def chat_chain(vectorstore):
+    llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, groq_api_key=GROQ_API_KEY)
     retriever = vectorstore.as_retriever()
+    memory = ConversationBufferMemory(output_key="answer", memory_key="chat_history", return_messages=True)
     chain = ConversationalRetrievalChain.from_llm(
         llm=llm,
     )
     return chain
+# Function to record audio using JavaScript
+RECORD_JS = """
+const sleep = time => new Promise(resolve => setTimeout(resolve, time));
+const b2text = blob => new Promise(resolve => {
+    const reader = new FileReader();
+    reader.onloadend = e => resolve(e.srcElement.result);
+    reader.readAsDataURL(blob);
+});
+var record = time => new Promise(async resolve => {
+    stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+    recorder = new MediaRecorder(stream);
+    chunks = [];
+    recorder.ondataavailable = e => chunks.push(e.data);
+    recorder.start();
+    await sleep(time);
+    recorder.onstop = async () => {
+        blob = new Blob(chunks);
+        text = await b2text(blob);
+        resolve(text);
+    };
+    recorder.stop();
+});
+"""
+def record_audio(seconds=5):
+    """Record audio via JavaScript and save it as a .wav file."""
+    st.write("Recording...")
+    from streamlit.components.v1 import html
+    html(f'<script>{RECORD_JS}</script>', height=0)
+    b64_audio = st.experimental_js("record", seconds * 1000)
+    audio_bytes = b64decode(b64_audio.split(",")[1])
+    with open("recorded_audio.wav", "wb") as f:
+        f.write(audio_bytes)
+    st.success("Audio recorded and saved!")
+    return "recorded_audio.wav"
+# Transcribe audio using Groq Whisper
+from groq import Groq
+def transcribe_audio(filepath):
+    client = Groq(api_key=GROQ_API_KEY)
+    with open(filepath, "rb") as file:
+        transcription = client.audio.transcriptions.create(
+            file=(filepath, file.read()),
+            model="distil-whisper-large-v3-en",
+            response_format="json",
+            language="en"
+        )
+    return transcription.text
+# Text-to-Speech Function
+def text_to_speech(response):
+    tts = gTTS(text=response, lang='en')
+    tts.save("response.mp3")
+    sound = AudioSegment.from_file("response.mp3")
+    play(sound)
+# Streamlit UI
+st.title("Chat with PDFs via Audio 🎙️📚")
+uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type=["pdf"])
 if uploaded_files:
+    vectorstore = process_and_store_pdfs(uploaded_files)
+    chain = chat_chain(vectorstore)
+    st.success("PDFs processed! Ready to chat.")
+    # User options for input
+    input_mode = st.radio("Choose input method:", ["Text", "Audio"])
+    # Text input
+    if input_mode == "Text":
+        user_input = st.text_input("Ask your question:")
+        if user_input:
+            with st.spinner("Thinking..."):
+                response = chain({"question": user_input})["answer"]
+                st.write(f"**Response:** {response}")
+                text_to_speech(response)
+    # Audio input
+    elif input_mode == "Audio":
+        if st.button("Record Audio"):
+            audio_file = record_audio(5)
+            st.audio(audio_file)
+            st.write("Transcribing audio...")
+            question = transcribe_audio(audio_file)
+            st.write(f"**You said:** {question}")
+            with st.spinner("Thinking..."):
+                response = chain({"question": question})["answer"]
+                st.write(f"**Response:** {response}")
+                text_to_speech(response)
 else:
     st.info("Please upload PDF files to start chatting.")