|
|
|
|
|
import os |
|
import chromadb |
|
import streamlit as st |
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
from langchain_chroma import Chroma |
|
from langchain_groq import ChatGroq |
|
from langchain.memory import ConversationBufferMemory |
|
from langchain.chains import ConversationalRetrievalChain |
|
from PyPDF2 import PdfReader |
|
from groq import Groq |
|
from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode |
|
import av |
|
|
|
|
|
chromadb.api.client.SharedSystemClient.clear_system_cache() |
|
|
|
|
|
GROQ_API_KEY = os.getenv("GROQ_API_KEY") |
|
if not GROQ_API_KEY: |
|
st.error("GROQ_API_KEY is not set. Please configure it in environment variables.") |
|
st.stop() |
|
|
|
|
|
groq_client = Groq(api_key=GROQ_API_KEY) |
|
llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, groq_api_key=GROQ_API_KEY) |
|
|
|
|
|
def process_and_store_pdfs(uploaded_files): |
|
texts = [] |
|
for uploaded_file in uploaded_files: |
|
reader = PdfReader(uploaded_file) |
|
for page in reader.pages: |
|
texts.append(page.extract_text()) |
|
|
|
embeddings = HuggingFaceEmbeddings() |
|
vectorstore = Chroma.from_texts(texts, embedding=embeddings, persist_directory="vector_db_dir") |
|
return vectorstore |
|
|
|
|
|
def chat_chain(vectorstore): |
|
retriever = vectorstore.as_retriever() |
|
memory = ConversationBufferMemory(output_key="answer", memory_key="chat_history", return_messages=True) |
|
|
|
chain = ConversationalRetrievalChain.from_llm( |
|
llm=llm, |
|
retriever=retriever, |
|
chain_type="stuff", |
|
memory=memory, |
|
verbose=True, |
|
return_source_documents=True |
|
) |
|
return chain |
|
|
|
|
|
def transcribe_audio(file_path): |
|
"""Transcribe audio using Groq's Whisper model.""" |
|
with open(file_path, "rb") as file: |
|
transcription = groq_client.audio.transcriptions.create( |
|
file=(file_path, file.read()), |
|
model="distil-whisper-large-v3-en", |
|
response_format="json", |
|
language="en" |
|
) |
|
return transcription.text |
|
|
|
|
|
class AudioProcessor(AudioProcessorBase): |
|
def recv(self, frame: av.AudioFrame) -> av.AudioFrame: |
|
return frame |
|
|
|
|
|
st.title("Chat with PDFs via Speech/Text ποΈππ") |
|
|
|
uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type=["pdf"]) |
|
|
|
if uploaded_files: |
|
vectorstore = process_and_store_pdfs(uploaded_files) |
|
chain = chat_chain(vectorstore) |
|
st.success("PDFs processed! Ready to chat.") |
|
|
|
input_method = st.radio("Choose Input Method", ["Text Input", "Record Audio", "Upload Audio File"]) |
|
|
|
|
|
if input_method == "Text Input": |
|
query = st.text_input("Ask your question:") |
|
if query: |
|
with st.spinner("Thinking..."): |
|
response = chain({"question": query})["answer"] |
|
st.write(f"**Response:** {response}") |
|
|
|
|
|
elif input_method == "Record Audio": |
|
st.write("Record your audio query:") |
|
webrtc_ctx = webrtc_streamer( |
|
key="record", |
|
mode=WebRtcMode.SENDONLY, |
|
audio_receiver_size=1024, |
|
audio_processor_factory=AudioProcessor, |
|
media_stream_constraints={"audio": True, "video": False}, |
|
) |
|
|
|
if webrtc_ctx.audio_receiver: |
|
st.write("Recording...") |
|
audio_frames = [] |
|
while True: |
|
frame = webrtc_ctx.audio_receiver.recv() |
|
audio_frames.append(frame) |
|
if len(audio_frames) > 5: |
|
break |
|
|
|
|
|
audio_file_path = "recorded_audio.wav" |
|
with av.open(audio_file_path, "w") as f: |
|
for frame in audio_frames: |
|
f.write(frame) |
|
st.success("Recording complete!") |
|
|
|
|
|
st.write("Transcribing audio...") |
|
transcription = transcribe_audio(audio_file_path) |
|
st.write(f"**You said:** {transcription}") |
|
|
|
with st.spinner("Generating response..."): |
|
response = chain({"question": transcription})["answer"] |
|
st.write(f"**Response:** {response}") |
|
|
|
|
|
elif input_method == "Upload Audio File": |
|
uploaded_audio = st.file_uploader("Upload an audio file (.wav, .mp3)", type=["wav", "mp3"]) |
|
if uploaded_audio: |
|
audio_file_path = "uploaded_audio.wav" |
|
with open(audio_file_path, "wb") as f: |
|
f.write(uploaded_audio.read()) |
|
|
|
st.audio(audio_file_path, format="audio/wav") |
|
st.write("Transcribing audio...") |
|
transcription = transcribe_audio(audio_file_path) |
|
st.write(f"**You said:** {transcription}") |
|
|
|
with st.spinner("Generating response..."): |
|
response = chain({"question": transcription})["answer"] |
|
st.write(f"**Response:** {response}") |
|
else: |
|
st.info("Please upload PDF files to start chatting.") |