Spaces:

DrishtiSharma
/

chat-w-docs-via-speech-or-text

Sleeping

File size: 3,784 Bytes

107ed40
 
b766313
f7483c6
fad1562
b766313
 
 
fad1562
 
c320ec9
af77c7a
fad1562
1a7463f
 
 
5d13b89
 
 
0aba088
5d13b89
fad1562
af77c7a
 
 
 
c320ec9
 
 
 
 
 
 
fad1562
5d13b89
130b915
5d13b89
fad1562
5d13b89
b766313
 
130b915
5d13b89
b766313
 
 
 
 
 
 
 
 
 
bb6f1e7
af77c7a
abce794
af77c7a
 
 
130b915
 
 
 
bb6f1e7
 
130b915
 
 
 
 
af77c7a
c320ec9
130b915
 
 
 
0aba088
130b915
af77c7a
 
 
 
130b915
af77c7a
130b915
 
0aba088
 
 
 
 
 
 
 
 
130b915
0aba088
af77c7a
130b915
af77c7a
 
130b915
c320ec9

#ref: https://www.youtube.com/watch?v=3ZDVmzlM6Nc

import os
import chromadb
import streamlit as st
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_groq import ChatGroq
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from PyPDF2 import PdfReader
from groq import Groq

# Clear ChromaDB cache to fix tenant issue
chromadb.api.client.SharedSystemClient.clear_system_cache()

# Ensure required environment variables are set
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
    st.error("GROQ_API_KEY is not set. Please configure it in environment variables.")
    st.stop()

# Initialize Groq Client for transcription and LLM
groq_client = Groq(api_key=GROQ_API_KEY)
llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, groq_api_key=GROQ_API_KEY)

# Function to process PDFs and set up the vectorstore
def process_and_store_pdfs(uploaded_files):
    texts = []
    for uploaded_file in uploaded_files:
        reader = PdfReader(uploaded_file)
        for page in reader.pages:
            texts.append(page.extract_text())

    embeddings = HuggingFaceEmbeddings()
    vectorstore = Chroma.from_texts(texts, embedding=embeddings, persist_directory="vector_db_dir")
    return vectorstore

# Function to set up the chat chain
def chat_chain(vectorstore):
    retriever = vectorstore.as_retriever()
    memory = ConversationBufferMemory(output_key="answer", memory_key="chat_history", return_messages=True)

    chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        chain_type="stuff",
        memory=memory,
        verbose=True,
        return_source_documents=True
    )
    return chain

# Function to transcribe audio using Groq's Whisper
def transcribe_audio(file_path):
    """Transcribe audio using Groq's Whisper model."""
    with open(file_path, "rb") as file:
        transcription = groq_client.audio.transcriptions.create(
            file=(file_path, file.read()),
            model="distil-whisper-large-v3-en",
            response_format="json",
            language="en"
        )
    # Access the text attribute
    return transcription.text

# Streamlit UI
st.title("Chat with PDFs via Audio 🎙️📚")

uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type=["pdf"])

if uploaded_files:
    vectorstore = process_and_store_pdfs(uploaded_files)
    chain = chat_chain(vectorstore)
    st.success("PDFs processed! Ready to chat.")

    input_method = st.radio("Choose Input Method", ["Text Input", "Audio File Upload"])

    # Text Input Mode
    if input_method == "Text Input":
        query = st.text_input("Ask your question:")
        if query:
            with st.spinner("Thinking..."):
                response = chain({"question": query})["answer"]
                st.write(f"**Response:** {response}")

    # Audio Input Mode (File Upload)
    elif input_method == "Audio File Upload":
        uploaded_audio = st.file_uploader("Upload an audio file (.wav, .mp3)", type=["wav", "mp3"])
        if uploaded_audio:
            audio_file_path = "uploaded_audio.wav"
            with open(audio_file_path, "wb") as f:
                f.write(uploaded_audio.read())

            st.audio(audio_file_path, format="audio/wav")
            st.write("Transcribing audio...")
            transcription = transcribe_audio(audio_file_path)
            st.write(f"**You said:** {transcription}")

            with st.spinner("Generating response..."):
                response = chain({"question": transcription})["answer"]
                st.write(f"**Response:** {response}")
else:
    st.info("Please upload PDF files to start chatting.")