File size: 3,784 Bytes
107ed40 b766313 f7483c6 fad1562 b766313 fad1562 c320ec9 af77c7a fad1562 1a7463f 5d13b89 0aba088 5d13b89 fad1562 af77c7a c320ec9 fad1562 5d13b89 130b915 5d13b89 fad1562 5d13b89 b766313 130b915 5d13b89 b766313 bb6f1e7 af77c7a abce794 af77c7a 130b915 bb6f1e7 130b915 af77c7a c320ec9 130b915 0aba088 130b915 af77c7a 130b915 af77c7a 130b915 0aba088 130b915 0aba088 af77c7a 130b915 af77c7a 130b915 c320ec9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
#ref: https://www.youtube.com/watch?v=3ZDVmzlM6Nc
import os
import chromadb
import streamlit as st
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_groq import ChatGroq
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from PyPDF2 import PdfReader
from groq import Groq
# Clear ChromaDB cache to fix tenant issue
chromadb.api.client.SharedSystemClient.clear_system_cache()
# Ensure required environment variables are set
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
st.error("GROQ_API_KEY is not set. Please configure it in environment variables.")
st.stop()
# Initialize Groq Client for transcription and LLM
groq_client = Groq(api_key=GROQ_API_KEY)
llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, groq_api_key=GROQ_API_KEY)
# Function to process PDFs and set up the vectorstore
def process_and_store_pdfs(uploaded_files):
texts = []
for uploaded_file in uploaded_files:
reader = PdfReader(uploaded_file)
for page in reader.pages:
texts.append(page.extract_text())
embeddings = HuggingFaceEmbeddings()
vectorstore = Chroma.from_texts(texts, embedding=embeddings, persist_directory="vector_db_dir")
return vectorstore
# Function to set up the chat chain
def chat_chain(vectorstore):
retriever = vectorstore.as_retriever()
memory = ConversationBufferMemory(output_key="answer", memory_key="chat_history", return_messages=True)
chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=retriever,
chain_type="stuff",
memory=memory,
verbose=True,
return_source_documents=True
)
return chain
# Function to transcribe audio using Groq's Whisper
def transcribe_audio(file_path):
"""Transcribe audio using Groq's Whisper model."""
with open(file_path, "rb") as file:
transcription = groq_client.audio.transcriptions.create(
file=(file_path, file.read()),
model="distil-whisper-large-v3-en",
response_format="json",
language="en"
)
# Access the text attribute
return transcription.text
# Streamlit UI
st.title("Chat with PDFs via Audio ποΈπ")
uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type=["pdf"])
if uploaded_files:
vectorstore = process_and_store_pdfs(uploaded_files)
chain = chat_chain(vectorstore)
st.success("PDFs processed! Ready to chat.")
input_method = st.radio("Choose Input Method", ["Text Input", "Audio File Upload"])
# Text Input Mode
if input_method == "Text Input":
query = st.text_input("Ask your question:")
if query:
with st.spinner("Thinking..."):
response = chain({"question": query})["answer"]
st.write(f"**Response:** {response}")
# Audio Input Mode (File Upload)
elif input_method == "Audio File Upload":
uploaded_audio = st.file_uploader("Upload an audio file (.wav, .mp3)", type=["wav", "mp3"])
if uploaded_audio:
audio_file_path = "uploaded_audio.wav"
with open(audio_file_path, "wb") as f:
f.write(uploaded_audio.read())
st.audio(audio_file_path, format="audio/wav")
st.write("Transcribing audio...")
transcription = transcribe_audio(audio_file_path)
st.write(f"**You said:** {transcription}")
with st.spinner("Generating response..."):
response = chain({"question": transcription})["answer"]
st.write(f"**Response:** {response}")
else:
st.info("Please upload PDF files to start chatting.")
|