DrishtiSharma commited on
Commit
ef7ea32
Β·
verified Β·
1 Parent(s): efc7b63

Create speech_input_interim.py

Browse files
Files changed (1) hide show
  1. speech_input_interim.py +146 -0
speech_input_interim.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ref: https://www.youtube.com/watch?v=3ZDVmzlM6Nc
2
+
3
+ import os
4
+ import chromadb
5
+ import streamlit as st
6
+ from langchain_huggingface import HuggingFaceEmbeddings
7
+ from langchain_chroma import Chroma
8
+ from langchain_groq import ChatGroq
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain.chains import ConversationalRetrievalChain
11
+ from PyPDF2 import PdfReader
12
+ from groq import Groq
13
+ from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
14
+ import av
15
+
16
+ # Clear ChromaDB cache to fix tenant issue
17
+ chromadb.api.client.SharedSystemClient.clear_system_cache()
18
+
19
+ # Ensure required environment variables are set
20
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
21
+ if not GROQ_API_KEY:
22
+ st.error("GROQ_API_KEY is not set. Please configure it in environment variables.")
23
+ st.stop()
24
+
25
+ # Initialize Groq Client for transcription and LLM
26
+ groq_client = Groq(api_key=GROQ_API_KEY)
27
+ llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, groq_api_key=GROQ_API_KEY)
28
+
29
+ # Function to process PDFs and set up the vectorstore
30
+ def process_and_store_pdfs(uploaded_files):
31
+ texts = []
32
+ for uploaded_file in uploaded_files:
33
+ reader = PdfReader(uploaded_file)
34
+ for page in reader.pages:
35
+ texts.append(page.extract_text())
36
+
37
+ embeddings = HuggingFaceEmbeddings()
38
+ vectorstore = Chroma.from_texts(texts, embedding=embeddings, persist_directory="vector_db_dir")
39
+ return vectorstore
40
+
41
+ # Function to set up the chat chain
42
+ def chat_chain(vectorstore):
43
+ retriever = vectorstore.as_retriever()
44
+ memory = ConversationBufferMemory(output_key="answer", memory_key="chat_history", return_messages=True)
45
+
46
+ chain = ConversationalRetrievalChain.from_llm(
47
+ llm=llm,
48
+ retriever=retriever,
49
+ chain_type="stuff",
50
+ memory=memory,
51
+ verbose=True,
52
+ return_source_documents=True
53
+ )
54
+ return chain
55
+
56
+ # Transcribe audio using Groq Whisper
57
+ def transcribe_audio(file_path):
58
+ """Transcribe audio using Groq's Whisper model."""
59
+ with open(file_path, "rb") as file:
60
+ transcription = groq_client.audio.transcriptions.create(
61
+ file=(file_path, file.read()),
62
+ model="distil-whisper-large-v3-en",
63
+ response_format="json",
64
+ language="en"
65
+ )
66
+ return transcription.text
67
+
68
+ # Audio Processor Class for Recording
69
+ class AudioProcessor(AudioProcessorBase):
70
+ def recv(self, frame: av.AudioFrame) -> av.AudioFrame:
71
+ return frame
72
+
73
+ # Streamlit UI
74
+ st.title("Chat with PDFs via Speech/Text πŸŽ™οΈπŸ“πŸ“š")
75
+
76
+ uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type=["pdf"])
77
+
78
+ if uploaded_files:
79
+ vectorstore = process_and_store_pdfs(uploaded_files)
80
+ chain = chat_chain(vectorstore)
81
+ st.success("PDFs processed! Ready to chat.")
82
+
83
+ input_method = st.radio("Choose Input Method", ["Text Input", "Record Audio", "Upload Audio File"])
84
+
85
+ # Text Input Mode
86
+ if input_method == "Text Input":
87
+ query = st.text_input("Ask your question:")
88
+ if query:
89
+ with st.spinner("Thinking..."):
90
+ response = chain({"question": query})["answer"]
91
+ st.write(f"**Response:** {response}")
92
+
93
+ # Record Audio
94
+ elif input_method == "Record Audio":
95
+ st.write("Record your audio query:")
96
+ webrtc_ctx = webrtc_streamer(
97
+ key="record",
98
+ mode=WebRtcMode.SENDONLY,
99
+ audio_receiver_size=1024,
100
+ audio_processor_factory=AudioProcessor,
101
+ media_stream_constraints={"audio": True, "video": False},
102
+ )
103
+
104
+ if webrtc_ctx.audio_receiver:
105
+ st.write("Recording...")
106
+ audio_frames = []
107
+ while True:
108
+ frame = webrtc_ctx.audio_receiver.recv()
109
+ audio_frames.append(frame)
110
+ if len(audio_frames) > 5: # Stop recording after a few frames
111
+ break
112
+
113
+ # Save the recorded audio
114
+ audio_file_path = "recorded_audio.wav"
115
+ with av.open(audio_file_path, "w") as f:
116
+ for frame in audio_frames:
117
+ f.write(frame)
118
+ st.success("Recording complete!")
119
+
120
+ # Transcribe and Generate Response
121
+ st.write("Transcribing audio...")
122
+ transcription = transcribe_audio(audio_file_path)
123
+ st.write(f"**You said:** {transcription}")
124
+
125
+ with st.spinner("Generating response..."):
126
+ response = chain({"question": transcription})["answer"]
127
+ st.write(f"**Response:** {response}")
128
+
129
+ # Upload Audio File Mode
130
+ elif input_method == "Upload Audio File":
131
+ uploaded_audio = st.file_uploader("Upload an audio file (.wav, .mp3)", type=["wav", "mp3"])
132
+ if uploaded_audio:
133
+ audio_file_path = "uploaded_audio.wav"
134
+ with open(audio_file_path, "wb") as f:
135
+ f.write(uploaded_audio.read())
136
+
137
+ st.audio(audio_file_path, format="audio/wav")
138
+ st.write("Transcribing audio...")
139
+ transcription = transcribe_audio(audio_file_path)
140
+ st.write(f"**You said:** {transcription}")
141
+
142
+ with st.spinner("Generating response..."):
143
+ response = chain({"question": transcription})["answer"]
144
+ st.write(f"**Response:** {response}")
145
+ else:
146
+ st.info("Please upload PDF files to start chatting.")