File size: 6,062 Bytes
53714cc 107ed40 b766313 f7483c6 fad1562 b766313 fad1562 c320ec9 af77c7a 170c9ab 35fd0cd fad1562 1a7463f 5d13b89 0aba088 5d13b89 fad1562 af77c7a c320ec9 fad1562 5d13b89 130b915 5d13b89 fad1562 5d13b89 b766313 130b915 5d13b89 b766313 170c9ab af77c7a abce794 af77c7a 130b915 bb6f1e7 130b915 170c9ab 35fd0cd 170c9ab 35fd0cd 170c9ab 35fd0cd 130b915 035ca2e 130b915 af77c7a c320ec9 130b915 170c9ab 130b915 af77c7a 130b915 af77c7a 130b915 170c9ab 35fd0cd 170c9ab 35fd0cd 170c9ab 35fd0cd 170c9ab 0aba088 130b915 0aba088 af77c7a 130b915 af77c7a 130b915 c320ec9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
#ref: https://www.youtube.com/watch?v=3ZDVmzlM6Nc ; https://github.com/plaban1981/Agents/blob/main/Audio_powered_RAG_using_langchain_groq.ipynb
import os
import chromadb
import streamlit as st
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_groq import ChatGroq
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from PyPDF2 import PdfReader
from groq import Groq
from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
import av
from pydub import AudioSegment
from io import BytesIO
# Clear ChromaDB cache to fix tenant issue
chromadb.api.client.SharedSystemClient.clear_system_cache()
# Ensure required environment variables are set
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
st.error("GROQ_API_KEY is not set. Please configure it in environment variables.")
st.stop()
# Initialize Groq Client for transcription and LLM
groq_client = Groq(api_key=GROQ_API_KEY)
llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, groq_api_key=GROQ_API_KEY)
# Function to process PDFs and set up the vectorstore
def process_and_store_pdfs(uploaded_files):
texts = []
for uploaded_file in uploaded_files:
reader = PdfReader(uploaded_file)
for page in reader.pages:
texts.append(page.extract_text())
embeddings = HuggingFaceEmbeddings()
vectorstore = Chroma.from_texts(texts, embedding=embeddings, persist_directory="vector_db_dir")
return vectorstore
# Function to set up the chat chain
def chat_chain(vectorstore):
retriever = vectorstore.as_retriever()
memory = ConversationBufferMemory(output_key="answer", memory_key="chat_history", return_messages=True)
chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=retriever,
chain_type="stuff",
memory=memory,
verbose=True,
return_source_documents=True
)
return chain
# Transcribe audio using Groq Whisper
def transcribe_audio(file_path):
"""Transcribe audio using Groq's Whisper model."""
with open(file_path, "rb") as file:
transcription = groq_client.audio.transcriptions.create(
file=(file_path, file.read()),
model="distil-whisper-large-v3-en",
response_format="json",
language="en"
)
return transcription.text
# Audio Processor Class for Recording
class AudioProcessor(AudioProcessorBase):
def __init__(self):
self.audio_buffer = BytesIO()
def recv(self, frame: av.AudioFrame) -> av.AudioFrame:
# Append audio data to buffer
audio_segment = AudioSegment(
data=frame.to_ndarray().tobytes(),
sample_width=2,
frame_rate=frame.sample_rate,
channels=1
)
self.audio_buffer.write(audio_segment.raw_data)
return frame
def get_audio_data(self):
return self.audio_buffer
# Streamlit UI
st.title("Chat with Docs via Speech/Text π£οΈππ")
uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type=["pdf"])
if uploaded_files:
vectorstore = process_and_store_pdfs(uploaded_files)
chain = chat_chain(vectorstore)
st.success("PDFs processed! Ready to chat.")
input_method = st.radio("Choose Input Method", ["Text Input", "Record Audio", "Upload Audio File"])
# Text Input Mode
if input_method == "Text Input":
query = st.text_input("Ask your question:")
if query:
with st.spinner("Thinking..."):
response = chain({"question": query})["answer"]
st.write(f"**Response:** {response}")
# Record Audio
elif input_method == "Record Audio":
st.write("Record your audio query:")
audio_processor = AudioProcessor()
webrtc_ctx = webrtc_streamer(
key="record",
mode=WebRtcMode.SENDONLY,
audio_processor_factory=lambda: audio_processor,
media_stream_constraints={"audio": True, "video": False},
)
# Stop recording when session ends
if webrtc_ctx.state.playing:
st.write("Recording... Speak now.")
elif webrtc_ctx.state.stopped:
st.write("Recording stopped. Processing...")
audio_data = audio_processor.get_audio_data()
if audio_data:
# Save audio to a file
audio_file_path = "recorded_audio.wav"
audio_segment = AudioSegment.from_file(BytesIO(audio_data.getvalue()), format="raw", frame_rate=48000, channels=1, sample_width=2)
audio_segment.export(audio_file_path, format="wav")
st.success("Recording saved successfully!")
# Transcribe and Generate Response
st.write("Transcribing audio...")
transcription = transcribe_audio(audio_file_path)
st.write(f"**You said:** {transcription}")
with st.spinner("Generating response..."):
response = chain({"question": transcription})["answer"]
st.write(f"**Response:** {response}")
# Upload Audio File Mode
elif input_method == "Upload Audio File":
uploaded_audio = st.file_uploader("Upload an audio file (.wav, .mp3)", type=["wav", "mp3"])
if uploaded_audio:
audio_file_path = "uploaded_audio.wav"
with open(audio_file_path, "wb") as f:
f.write(uploaded_audio.read())
st.audio(audio_file_path, format="audio/wav")
st.write("Transcribing audio...")
transcription = transcribe_audio(audio_file_path)
st.write(f"**You said:** {transcription}")
with st.spinner("Generating response..."):
response = chain({"question": transcription})["answer"]
st.write(f"**Response:** {response}")
else:
st.info("Please upload PDF files to start chatting.")
|