File size: 5,376 Bytes
ef7ea32 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
#ref: https://www.youtube.com/watch?v=3ZDVmzlM6Nc
import os
import chromadb
import streamlit as st
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_groq import ChatGroq
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from PyPDF2 import PdfReader
from groq import Groq
from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
import av
# Clear ChromaDB cache to fix tenant issue
chromadb.api.client.SharedSystemClient.clear_system_cache()
# Ensure required environment variables are set
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
st.error("GROQ_API_KEY is not set. Please configure it in environment variables.")
st.stop()
# Initialize Groq Client for transcription and LLM
groq_client = Groq(api_key=GROQ_API_KEY)
llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, groq_api_key=GROQ_API_KEY)
# Function to process PDFs and set up the vectorstore
def process_and_store_pdfs(uploaded_files):
texts = []
for uploaded_file in uploaded_files:
reader = PdfReader(uploaded_file)
for page in reader.pages:
texts.append(page.extract_text())
embeddings = HuggingFaceEmbeddings()
vectorstore = Chroma.from_texts(texts, embedding=embeddings, persist_directory="vector_db_dir")
return vectorstore
# Function to set up the chat chain
def chat_chain(vectorstore):
retriever = vectorstore.as_retriever()
memory = ConversationBufferMemory(output_key="answer", memory_key="chat_history", return_messages=True)
chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=retriever,
chain_type="stuff",
memory=memory,
verbose=True,
return_source_documents=True
)
return chain
# Transcribe audio using Groq Whisper
def transcribe_audio(file_path):
"""Transcribe audio using Groq's Whisper model."""
with open(file_path, "rb") as file:
transcription = groq_client.audio.transcriptions.create(
file=(file_path, file.read()),
model="distil-whisper-large-v3-en",
response_format="json",
language="en"
)
return transcription.text
# Audio Processor Class for Recording
class AudioProcessor(AudioProcessorBase):
def recv(self, frame: av.AudioFrame) -> av.AudioFrame:
return frame
# Streamlit UI
st.title("Chat with PDFs via Speech/Text ποΈππ")
uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type=["pdf"])
if uploaded_files:
vectorstore = process_and_store_pdfs(uploaded_files)
chain = chat_chain(vectorstore)
st.success("PDFs processed! Ready to chat.")
input_method = st.radio("Choose Input Method", ["Text Input", "Record Audio", "Upload Audio File"])
# Text Input Mode
if input_method == "Text Input":
query = st.text_input("Ask your question:")
if query:
with st.spinner("Thinking..."):
response = chain({"question": query})["answer"]
st.write(f"**Response:** {response}")
# Record Audio
elif input_method == "Record Audio":
st.write("Record your audio query:")
webrtc_ctx = webrtc_streamer(
key="record",
mode=WebRtcMode.SENDONLY,
audio_receiver_size=1024,
audio_processor_factory=AudioProcessor,
media_stream_constraints={"audio": True, "video": False},
)
if webrtc_ctx.audio_receiver:
st.write("Recording...")
audio_frames = []
while True:
frame = webrtc_ctx.audio_receiver.recv()
audio_frames.append(frame)
if len(audio_frames) > 5: # Stop recording after a few frames
break
# Save the recorded audio
audio_file_path = "recorded_audio.wav"
with av.open(audio_file_path, "w") as f:
for frame in audio_frames:
f.write(frame)
st.success("Recording complete!")
# Transcribe and Generate Response
st.write("Transcribing audio...")
transcription = transcribe_audio(audio_file_path)
st.write(f"**You said:** {transcription}")
with st.spinner("Generating response..."):
response = chain({"question": transcription})["answer"]
st.write(f"**Response:** {response}")
# Upload Audio File Mode
elif input_method == "Upload Audio File":
uploaded_audio = st.file_uploader("Upload an audio file (.wav, .mp3)", type=["wav", "mp3"])
if uploaded_audio:
audio_file_path = "uploaded_audio.wav"
with open(audio_file_path, "wb") as f:
f.write(uploaded_audio.read())
st.audio(audio_file_path, format="audio/wav")
st.write("Transcribing audio...")
transcription = transcribe_audio(audio_file_path)
st.write(f"**You said:** {transcription}")
with st.spinner("Generating response..."):
response = chain({"question": transcription})["answer"]
st.write(f"**Response:** {response}")
else:
st.info("Please upload PDF files to start chatting.") |