DrishtiSharma commited on
Commit
1e24619
Β·
verified Β·
1 Parent(s): 230a23e

Create sppech_input_interim.py

Browse files
Files changed (1) hide show
  1. sppech_input_interim.py +194 -0
sppech_input_interim.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Hugging Face's logo
2
+ Hugging Face
3
+ Search models, datasets, users...
4
+ Models
5
+ Datasets
6
+ Spaces
7
+ Posts
8
+ Docs
9
+ Enterprise
10
+ Pricing
11
+
12
+
13
+
14
+ Spaces:
15
+
16
+ DrishtiSharma
17
+ /
18
+ chat-w-docs-via-speech-or-text
19
+
20
+
21
+ like
22
+ 1
23
+
24
+ Logs
25
+ App
26
+ Files
27
+ Community
28
+ Settings
29
+ chat-w-docs-via-speech-or-text
30
+ /
31
+ app.py
32
+
33
+ DrishtiSharma's picture
34
+ DrishtiSharma
35
+ Update app.py
36
+ 230a23e
37
+ verified
38
+ 3 minutes ago
39
+ raw
40
+
41
+ Copy download link
42
+ history
43
+ blame
44
+ edit
45
+ delete
46
+
47
+ 5.38 kB
48
+ #ref: https://www.youtube.com/watch?v=3ZDVmzlM6Nc
49
+
50
+ import os
51
+ import chromadb
52
+ import streamlit as st
53
+ from langchain_huggingface import HuggingFaceEmbeddings
54
+ from langchain_chroma import Chroma
55
+ from langchain_groq import ChatGroq
56
+ from langchain.memory import ConversationBufferMemory
57
+ from langchain.chains import ConversationalRetrievalChain
58
+ from PyPDF2 import PdfReader
59
+ from groq import Groq
60
+ from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
61
+ import av
62
+
63
+ # Clear ChromaDB cache to fix tenant issue
64
+ chromadb.api.client.SharedSystemClient.clear_system_cache()
65
+
66
+ # Ensure required environment variables are set
67
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
68
+ if not GROQ_API_KEY:
69
+ st.error("GROQ_API_KEY is not set. Please configure it in environment variables.")
70
+ st.stop()
71
+
72
+ # Initialize Groq Client for transcription and LLM
73
+ groq_client = Groq(api_key=GROQ_API_KEY)
74
+ llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, groq_api_key=GROQ_API_KEY)
75
+
76
+ # Function to process PDFs and set up the vectorstore
77
+ def process_and_store_pdfs(uploaded_files):
78
+ texts = []
79
+ for uploaded_file in uploaded_files:
80
+ reader = PdfReader(uploaded_file)
81
+ for page in reader.pages:
82
+ texts.append(page.extract_text())
83
+
84
+ embeddings = HuggingFaceEmbeddings()
85
+ vectorstore = Chroma.from_texts(texts, embedding=embeddings, persist_directory="vector_db_dir")
86
+ return vectorstore
87
+
88
+ # Function to set up the chat chain
89
+ def chat_chain(vectorstore):
90
+ retriever = vectorstore.as_retriever()
91
+ memory = ConversationBufferMemory(output_key="answer", memory_key="chat_history", return_messages=True)
92
+
93
+ chain = ConversationalRetrievalChain.from_llm(
94
+ llm=llm,
95
+ retriever=retriever,
96
+ chain_type="stuff",
97
+ memory=memory,
98
+ verbose=True,
99
+ return_source_documents=True
100
+ )
101
+ return chain
102
+
103
+ # Transcribe audio using Groq Whisper
104
+ def transcribe_audio(file_path):
105
+ """Transcribe audio using Groq's Whisper model."""
106
+ with open(file_path, "rb") as file:
107
+ transcription = groq_client.audio.transcriptions.create(
108
+ file=(file_path, file.read()),
109
+ model="distil-whisper-large-v3-en",
110
+ response_format="json",
111
+ language="en"
112
+ )
113
+ return transcription.text
114
+
115
+ # Audio Processor Class for Recording
116
+ class AudioProcessor(AudioProcessorBase):
117
+ def recv(self, frame: av.AudioFrame) -> av.AudioFrame:
118
+ return frame
119
+
120
+ # Streamlit UI
121
+ st.title("Chat with PDFs via Speech/Text πŸŽ™οΈπŸ“πŸ“š")
122
+
123
+ uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type=["pdf"])
124
+
125
+ if uploaded_files:
126
+ vectorstore = process_and_store_pdfs(uploaded_files)
127
+ chain = chat_chain(vectorstore)
128
+ st.success("PDFs processed! Ready to chat.")
129
+
130
+ input_method = st.radio("Choose Input Method", ["Text Input", "Record Audio", "Upload Audio File"])
131
+
132
+ # Text Input Mode
133
+ if input_method == "Text Input":
134
+ query = st.text_input("Ask your question:")
135
+ if query:
136
+ with st.spinner("Thinking..."):
137
+ response = chain({"question": query})["answer"]
138
+ st.write(f"**Response:** {response}")
139
+
140
+ # Record Audio
141
+ elif input_method == "Record Audio":
142
+ st.write("Record your audio query:")
143
+ webrtc_ctx = webrtc_streamer(
144
+ key="record",
145
+ mode=WebRtcMode.SENDONLY,
146
+ audio_receiver_size=1024,
147
+ audio_processor_factory=AudioProcessor,
148
+ media_stream_constraints={"audio": True, "video": False},
149
+ )
150
+
151
+ if webrtc_ctx.audio_receiver:
152
+ st.write("Recording...")
153
+ audio_frames = []
154
+ while True:
155
+ frame = webrtc_ctx.audio_receiver.recv()
156
+ audio_frames.append(frame)
157
+ if len(audio_frames) > 5: # Stop recording after a few frames
158
+ break
159
+
160
+ # Save the recorded audio
161
+ audio_file_path = "recorded_audio.wav"
162
+ with av.open(audio_file_path, "w") as f:
163
+ for frame in audio_frames:
164
+ f.write(frame)
165
+ st.success("Recording complete!")
166
+
167
+ # Transcribe and Generate Response
168
+ st.write("Transcribing audio...")
169
+ transcription = transcribe_audio(audio_file_path)
170
+ st.write(f"**You said:** {transcription}")
171
+
172
+ with st.spinner("Generating response..."):
173
+ response = chain({"question": transcription})["answer"]
174
+ st.write(f"**Response:** {response}")
175
+
176
+ # Upload Audio File Mode
177
+ elif input_method == "Upload Audio File":
178
+ uploaded_audio = st.file_uploader("Upload an audio file (.wav, .mp3)", type=["wav", "mp3"])
179
+ if uploaded_audio:
180
+ audio_file_path = "uploaded_audio.wav"
181
+ with open(audio_file_path, "wb") as f:
182
+ f.write(uploaded_audio.read())
183
+
184
+ st.audio(audio_file_path, format="audio/wav")
185
+ st.write("Transcribing audio...")
186
+ transcription = transcribe_audio(audio_file_path)
187
+ st.write(f"**You said:** {transcription}")
188
+
189
+ with st.spinner("Generating response..."):
190
+ response = chain({"question": transcription})["answer"]
191
+ st.write(f"**Response:** {response}")
192
+ else:
193
+ st.info("Please upload PDF files to start chatting.")
194
+