File size: 5,376 Bytes
ef7ea32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#ref: https://www.youtube.com/watch?v=3ZDVmzlM6Nc

import os
import chromadb
import streamlit as st
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_groq import ChatGroq
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from PyPDF2 import PdfReader
from groq import Groq
from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
import av

# Clear ChromaDB cache to fix tenant issue
chromadb.api.client.SharedSystemClient.clear_system_cache()

# Ensure required environment variables are set
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
    st.error("GROQ_API_KEY is not set. Please configure it in environment variables.")
    st.stop()

# Initialize Groq Client for transcription and LLM
groq_client = Groq(api_key=GROQ_API_KEY)
llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, groq_api_key=GROQ_API_KEY)

# Function to process PDFs and set up the vectorstore
def process_and_store_pdfs(uploaded_files):
    texts = []
    for uploaded_file in uploaded_files:
        reader = PdfReader(uploaded_file)
        for page in reader.pages:
            texts.append(page.extract_text())

    embeddings = HuggingFaceEmbeddings()
    vectorstore = Chroma.from_texts(texts, embedding=embeddings, persist_directory="vector_db_dir")
    return vectorstore

# Function to set up the chat chain
def chat_chain(vectorstore):
    retriever = vectorstore.as_retriever()
    memory = ConversationBufferMemory(output_key="answer", memory_key="chat_history", return_messages=True)

    chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        chain_type="stuff",
        memory=memory,
        verbose=True,
        return_source_documents=True
    )
    return chain

# Transcribe audio using Groq Whisper
def transcribe_audio(file_path):
    """Transcribe audio using Groq's Whisper model."""
    with open(file_path, "rb") as file:
        transcription = groq_client.audio.transcriptions.create(
            file=(file_path, file.read()),
            model="distil-whisper-large-v3-en",
            response_format="json",
            language="en"
        )
    return transcription.text

# Audio Processor Class for Recording
class AudioProcessor(AudioProcessorBase):
    def recv(self, frame: av.AudioFrame) -> av.AudioFrame:
        return frame

# Streamlit UI
st.title("Chat with PDFs via Speech/Text πŸŽ™οΈπŸ“πŸ“š")

uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type=["pdf"])

if uploaded_files:
    vectorstore = process_and_store_pdfs(uploaded_files)
    chain = chat_chain(vectorstore)
    st.success("PDFs processed! Ready to chat.")

    input_method = st.radio("Choose Input Method", ["Text Input", "Record Audio", "Upload Audio File"])

    # Text Input Mode
    if input_method == "Text Input":
        query = st.text_input("Ask your question:")
        if query:
            with st.spinner("Thinking..."):
                response = chain({"question": query})["answer"]
                st.write(f"**Response:** {response}")

    # Record Audio
    elif input_method == "Record Audio":
        st.write("Record your audio query:")
        webrtc_ctx = webrtc_streamer(
            key="record",
            mode=WebRtcMode.SENDONLY,
            audio_receiver_size=1024,
            audio_processor_factory=AudioProcessor,
            media_stream_constraints={"audio": True, "video": False},
        )

        if webrtc_ctx.audio_receiver:
            st.write("Recording...")
            audio_frames = []
            while True:
                frame = webrtc_ctx.audio_receiver.recv()
                audio_frames.append(frame)
                if len(audio_frames) > 5:  # Stop recording after a few frames
                    break

            # Save the recorded audio
            audio_file_path = "recorded_audio.wav"
            with av.open(audio_file_path, "w") as f:
                for frame in audio_frames:
                    f.write(frame)
            st.success("Recording complete!")

            # Transcribe and Generate Response
            st.write("Transcribing audio...")
            transcription = transcribe_audio(audio_file_path)
            st.write(f"**You said:** {transcription}")

            with st.spinner("Generating response..."):
                response = chain({"question": transcription})["answer"]
                st.write(f"**Response:** {response}")

    # Upload Audio File Mode
    elif input_method == "Upload Audio File":
        uploaded_audio = st.file_uploader("Upload an audio file (.wav, .mp3)", type=["wav", "mp3"])
        if uploaded_audio:
            audio_file_path = "uploaded_audio.wav"
            with open(audio_file_path, "wb") as f:
                f.write(uploaded_audio.read())

            st.audio(audio_file_path, format="audio/wav")
            st.write("Transcribing audio...")
            transcription = transcribe_audio(audio_file_path)
            st.write(f"**You said:** {transcription}")

            with st.spinner("Generating response..."):
                response = chain({"question": transcription})["answer"]
                st.write(f"**Response:** {response}")
else:
    st.info("Please upload PDF files to start chatting.")