Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -140,118 +140,6 @@
|
|
140 |
# if __name__ == "__main__":
|
141 |
# demo.launch()
|
142 |
|
143 |
-
# import gradio as gr
|
144 |
-
# from huggingface_hub import InferenceClient
|
145 |
-
# import tempfile
|
146 |
-
|
147 |
-
# # Initialize clients
|
148 |
-
# chat_client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")
|
149 |
-
# stt_client = InferenceClient("openai/whisper-large-v3")
|
150 |
-
|
151 |
-
# def transcribe_audio(audio_file):
|
152 |
-
# """Convert audio to text using Whisper model"""
|
153 |
-
# with open(audio_file, "rb") as f:
|
154 |
-
# result = stt_client.automatic_speech_recognition(f.read())
|
155 |
-
# return result.text # Extract only the text from the response
|
156 |
-
|
157 |
-
# def respond(history, query):
|
158 |
-
# system_message = """You are a friendly Product Assistant. Follow these rules:
|
159 |
-
# 1. If the query is product-related, provide structured recommendations
|
160 |
-
# 2. Consider both voice and text inputs equally
|
161 |
-
# 3. Format responses with bullet points and emojis
|
162 |
-
# 4. Always acknowledge voice queries specifically"""
|
163 |
-
|
164 |
-
# product_prompt = f"""Analyze this {'voice' if history[-1][0] == query else 'text'} query:
|
165 |
-
# "{query}"
|
166 |
-
# Recommend products considering:
|
167 |
-
# - User intent
|
168 |
-
# - Semantic meaning
|
169 |
-
# - Potential use cases
|
170 |
-
# - Price ranges
|
171 |
-
# Provide ranked list with brief explanations"""
|
172 |
-
|
173 |
-
# messages = [
|
174 |
-
# {"role": "system", "content": system_message},
|
175 |
-
# {"role": "user", "content": product_prompt}
|
176 |
-
# ]
|
177 |
-
|
178 |
-
# # Build conversation history
|
179 |
-
# for entry in history[:-1]: # Exclude current query
|
180 |
-
# messages.extend([
|
181 |
-
# {"role": "user", "content": entry[0]},
|
182 |
-
# {"role": "assistant", "content": entry[1]}
|
183 |
-
# ])
|
184 |
-
|
185 |
-
# # Generate streamed response
|
186 |
-
# response = ""
|
187 |
-
# for chunk in chat_client.chat_completion(
|
188 |
-
# messages,
|
189 |
-
# max_tokens=2048,
|
190 |
-
# stream=True,
|
191 |
-
# temperature=0.7,
|
192 |
-
# top_p=0.95,
|
193 |
-
# ):
|
194 |
-
# token = chunk.choices[0].delta.content
|
195 |
-
# response += token
|
196 |
-
# history[-1] = (history[-1][0], response) # Update last entry
|
197 |
-
# yield history
|
198 |
-
|
199 |
-
# # Custom styling
|
200 |
-
# css = """
|
201 |
-
# .gradio-container { background: #f5f7fa !important; }
|
202 |
-
# .audio-input { background: white !important; border-radius: 10px; }
|
203 |
-
# """
|
204 |
-
|
205 |
-
# with gr.Blocks(css=css) as demo:
|
206 |
-
# gr.Markdown("# π€ Voice-Activated Product Advisor ποΈ")
|
207 |
-
|
208 |
-
# with gr.Row():
|
209 |
-
# chatbot = gr.Chatbot(height=600, bubble_full_width=False)
|
210 |
-
|
211 |
-
# with gr.Column():
|
212 |
-
# with gr.Tab("ποΈ Voice Input"):
|
213 |
-
# audio_input = gr.Audio(
|
214 |
-
# sources="microphone",
|
215 |
-
# type="filepath",
|
216 |
-
# label="Speak your product request",
|
217 |
-
# elem_classes="audio-input"
|
218 |
-
# )
|
219 |
-
# with gr.Tab("π Text Input"):
|
220 |
-
# text_input = gr.Textbox(label="Type your request")
|
221 |
-
# submit_btn = gr.Button("π Get Recommendations", variant="primary")
|
222 |
-
|
223 |
-
# def process_inputs(text, audio, history):
|
224 |
-
# """Handle both input types"""
|
225 |
-
# query = text.strip()
|
226 |
-
# if audio:
|
227 |
-
# query = transcribe_audio(audio)
|
228 |
-
# # Add voice-specific marker
|
229 |
-
# query = f"π€ Voice Query: {query}"
|
230 |
-
|
231 |
-
# if query:
|
232 |
-
# return history + [(query, None)] # Proper tuple format
|
233 |
-
# return history
|
234 |
-
|
235 |
-
# submit_btn.click(
|
236 |
-
# process_inputs,
|
237 |
-
# [text_input, audio_input, chatbot],
|
238 |
-
# chatbot,
|
239 |
-
# queue=False
|
240 |
-
# ).then(
|
241 |
-
# respond,
|
242 |
-
# [chatbot, text_input],
|
243 |
-
# chatbot
|
244 |
-
# )
|
245 |
-
|
246 |
-
# # Clear inputs after submission
|
247 |
-
# submit_btn.click(
|
248 |
-
# lambda: [None, None], # Clear audio and text inputs
|
249 |
-
# outputs=[text_input, audio_input]
|
250 |
-
# )
|
251 |
-
|
252 |
-
# if __name__ == "__main__":
|
253 |
-
# demo.launch()
|
254 |
-
|
255 |
import gradio as gr
|
256 |
from huggingface_hub import InferenceClient
|
257 |
import tempfile
|
@@ -262,33 +150,38 @@ stt_client = InferenceClient("openai/whisper-large-v3")
|
|
262 |
|
263 |
def transcribe_audio(audio_file):
|
264 |
"""Convert audio to text using Whisper model"""
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
return result.text
|
269 |
-
except Exception as e:
|
270 |
-
print(f"Transcription error: {e}")
|
271 |
-
return "Could not process audio. Please try again."
|
272 |
|
273 |
def respond(history, query):
|
274 |
-
system_message = """You are a
|
275 |
-
1.
|
276 |
-
2.
|
277 |
-
3.
|
|
|
278 |
|
279 |
-
product_prompt = f"""Analyze this {'
|
280 |
-
"{query
|
281 |
Recommend products considering:
|
282 |
-
- Voice tone analysis (if audio)
|
283 |
-
- Semantic meaning
|
284 |
- User intent
|
285 |
-
-
|
|
|
|
|
|
|
286 |
|
287 |
messages = [
|
288 |
{"role": "system", "content": system_message},
|
289 |
{"role": "user", "content": product_prompt}
|
290 |
]
|
291 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
# Generate streamed response
|
293 |
response = ""
|
294 |
for chunk in chat_client.chat_completion(
|
@@ -300,65 +193,49 @@ def respond(history, query):
|
|
300 |
):
|
301 |
token = chunk.choices[0].delta.content
|
302 |
response += token
|
303 |
-
history[-1] = (history[-1][0], response)
|
304 |
yield history
|
305 |
|
|
|
306 |
css = """
|
307 |
-
.gradio-container { background: #
|
308 |
.audio-input { background: white !important; border-radius: 10px; }
|
309 |
-
.mic-status { color: #4a90e2; font-weight: bold; }
|
310 |
"""
|
311 |
|
312 |
-
with gr.Blocks(css=css
|
313 |
-
gr.Markdown("#
|
314 |
|
315 |
with gr.Row():
|
316 |
chatbot = gr.Chatbot(height=600, bubble_full_width=False)
|
317 |
|
318 |
with gr.Column():
|
319 |
-
|
320 |
-
with gr.Group():
|
321 |
audio_input = gr.Audio(
|
322 |
sources="microphone",
|
323 |
type="filepath",
|
324 |
-
label="
|
325 |
-
elem_classes="audio-input"
|
326 |
-
interactive=True
|
327 |
)
|
328 |
-
|
329 |
-
|
330 |
-
# Text input
|
331 |
-
text_input = gr.Textbox(label="Or type your request")
|
332 |
-
|
333 |
-
# Unified submit button
|
334 |
submit_btn = gr.Button("π Get Recommendations", variant="primary")
|
335 |
|
336 |
-
# Handle audio permissions
|
337 |
-
def request_mic_access():
|
338 |
-
return gr.update(text="π’ Mic ready") if audio_input.is_enabled else gr.update(text="π΄ Mic blocked")
|
339 |
-
|
340 |
-
# Process inputs
|
341 |
def process_inputs(text, audio, history):
|
|
|
342 |
query = text.strip()
|
343 |
if audio:
|
344 |
-
|
345 |
-
|
|
|
346 |
|
347 |
if query:
|
348 |
-
return history + [(query, None)]
|
349 |
-
return history
|
350 |
|
351 |
-
# Connect all components
|
352 |
-
audio_input.change(
|
353 |
-
request_mic_access,
|
354 |
-
outputs=mic_status,
|
355 |
-
queue=False
|
356 |
-
)
|
357 |
-
|
358 |
submit_btn.click(
|
359 |
process_inputs,
|
360 |
[text_input, audio_input, chatbot],
|
361 |
-
|
362 |
queue=False
|
363 |
).then(
|
364 |
respond,
|
@@ -366,5 +243,128 @@ with gr.Blocks(css=css, title="Voice Product Assistant") as demo:
|
|
366 |
chatbot
|
367 |
)
|
368 |
|
|
|
|
|
|
|
|
|
|
|
|
|
369 |
if __name__ == "__main__":
|
370 |
-
demo.launch(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
# if __name__ == "__main__":
|
141 |
# demo.launch()
|
142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
import gradio as gr
|
144 |
from huggingface_hub import InferenceClient
|
145 |
import tempfile
|
|
|
150 |
|
151 |
def transcribe_audio(audio_file):
|
152 |
"""Convert audio to text using Whisper model"""
|
153 |
+
with open(audio_file, "rb") as f:
|
154 |
+
result = stt_client.automatic_speech_recognition(f.read())
|
155 |
+
return result.text # Extract only the text from the response
|
|
|
|
|
|
|
|
|
156 |
|
157 |
def respond(history, query):
|
158 |
+
system_message = """You are a friendly Product Assistant. Follow these rules:
|
159 |
+
1. If the query is product-related, provide structured recommendations
|
160 |
+
2. Consider both voice and text inputs equally
|
161 |
+
3. Format responses with bullet points and emojis
|
162 |
+
4. Always acknowledge voice queries specifically"""
|
163 |
|
164 |
+
product_prompt = f"""Analyze this {'voice' if history[-1][0] == query else 'text'} query:
|
165 |
+
"{query}"
|
166 |
Recommend products considering:
|
|
|
|
|
167 |
- User intent
|
168 |
+
- Semantic meaning
|
169 |
+
- Potential use cases
|
170 |
+
- Price ranges
|
171 |
+
Provide ranked list with brief explanations"""
|
172 |
|
173 |
messages = [
|
174 |
{"role": "system", "content": system_message},
|
175 |
{"role": "user", "content": product_prompt}
|
176 |
]
|
177 |
|
178 |
+
# Build conversation history
|
179 |
+
for entry in history[:-1]: # Exclude current query
|
180 |
+
messages.extend([
|
181 |
+
{"role": "user", "content": entry[0]},
|
182 |
+
{"role": "assistant", "content": entry[1]}
|
183 |
+
])
|
184 |
+
|
185 |
# Generate streamed response
|
186 |
response = ""
|
187 |
for chunk in chat_client.chat_completion(
|
|
|
193 |
):
|
194 |
token = chunk.choices[0].delta.content
|
195 |
response += token
|
196 |
+
history[-1] = (history[-1][0], response) # Update last entry
|
197 |
yield history
|
198 |
|
199 |
+
# Custom styling
|
200 |
css = """
|
201 |
+
.gradio-container { background: #f5f7fa !important; }
|
202 |
.audio-input { background: white !important; border-radius: 10px; }
|
|
|
203 |
"""
|
204 |
|
205 |
+
with gr.Blocks(css=css) as demo:
|
206 |
+
gr.Markdown("# π€ Voice-Activated Product Advisor ποΈ")
|
207 |
|
208 |
with gr.Row():
|
209 |
chatbot = gr.Chatbot(height=600, bubble_full_width=False)
|
210 |
|
211 |
with gr.Column():
|
212 |
+
with gr.Tab("ποΈ Voice Input"):
|
|
|
213 |
audio_input = gr.Audio(
|
214 |
sources="microphone",
|
215 |
type="filepath",
|
216 |
+
label="Speak your product request",
|
217 |
+
elem_classes="audio-input"
|
|
|
218 |
)
|
219 |
+
with gr.Tab("π Text Input"):
|
220 |
+
text_input = gr.Textbox(label="Type your request")
|
|
|
|
|
|
|
|
|
221 |
submit_btn = gr.Button("π Get Recommendations", variant="primary")
|
222 |
|
|
|
|
|
|
|
|
|
|
|
223 |
def process_inputs(text, audio, history):
|
224 |
+
"""Handle both input types"""
|
225 |
query = text.strip()
|
226 |
if audio:
|
227 |
+
query = transcribe_audio(audio)
|
228 |
+
# Add voice-specific marker
|
229 |
+
query = f"π€ Voice Query: {query}"
|
230 |
|
231 |
if query:
|
232 |
+
return history + [(query, None)] # Proper tuple format
|
233 |
+
return history
|
234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
submit_btn.click(
|
236 |
process_inputs,
|
237 |
[text_input, audio_input, chatbot],
|
238 |
+
chatbot,
|
239 |
queue=False
|
240 |
).then(
|
241 |
respond,
|
|
|
243 |
chatbot
|
244 |
)
|
245 |
|
246 |
+
# Clear inputs after submission
|
247 |
+
submit_btn.click(
|
248 |
+
lambda: [None, None], # Clear audio and text inputs
|
249 |
+
outputs=[text_input, audio_input]
|
250 |
+
)
|
251 |
+
|
252 |
if __name__ == "__main__":
|
253 |
+
demo.launch()
|
254 |
+
|
255 |
+
# import gradio as gr
|
256 |
+
# from huggingface_hub import InferenceClient
|
257 |
+
# import tempfile
|
258 |
+
|
259 |
+
# # Initialize clients
|
260 |
+
# chat_client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")
|
261 |
+
# stt_client = InferenceClient("openai/whisper-large-v3")
|
262 |
+
|
263 |
+
# def transcribe_audio(audio_file):
|
264 |
+
# """Convert audio to text using Whisper model"""
|
265 |
+
# try:
|
266 |
+
# with open(audio_file, "rb") as f:
|
267 |
+
# result = stt_client.automatic_speech_recognition(f.read())
|
268 |
+
# return result.text
|
269 |
+
# except Exception as e:
|
270 |
+
# print(f"Transcription error: {e}")
|
271 |
+
# return "Could not process audio. Please try again."
|
272 |
+
|
273 |
+
# def respond(history, query):
|
274 |
+
# system_message = """You are a Voice-Aware Product Assistant. Rules:
|
275 |
+
# 1. Always acknowledge voice queries with π§ icon
|
276 |
+
# 2. Provide structured recommendations with emojis
|
277 |
+
# 3. Consider both voice and text inputs equally"""
|
278 |
+
|
279 |
+
# product_prompt = f"""Analyze this {'π§ VOICE' if 'π§' in query else 'π TEXT'} query:
|
280 |
+
# "{query.replace('π§ VOICE: ', '')}"
|
281 |
+
# Recommend products considering:
|
282 |
+
# - Voice tone analysis (if audio)
|
283 |
+
# - Semantic meaning
|
284 |
+
# - User intent
|
285 |
+
# - Price context"""
|
286 |
+
|
287 |
+
# messages = [
|
288 |
+
# {"role": "system", "content": system_message},
|
289 |
+
# {"role": "user", "content": product_prompt}
|
290 |
+
# ]
|
291 |
+
|
292 |
+
# # Generate streamed response
|
293 |
+
# response = ""
|
294 |
+
# for chunk in chat_client.chat_completion(
|
295 |
+
# messages,
|
296 |
+
# max_tokens=2048,
|
297 |
+
# stream=True,
|
298 |
+
# temperature=0.7,
|
299 |
+
# top_p=0.95,
|
300 |
+
# ):
|
301 |
+
# token = chunk.choices[0].delta.content
|
302 |
+
# response += token
|
303 |
+
# history[-1] = (history[-1][0], response)
|
304 |
+
# yield history
|
305 |
+
|
306 |
+
# css = """
|
307 |
+
# .gradio-container { background: #f8f9fa !important; }
|
308 |
+
# .audio-input { background: white !important; border-radius: 10px; }
|
309 |
+
# .mic-status { color: #4a90e2; font-weight: bold; }
|
310 |
+
# """
|
311 |
+
|
312 |
+
# with gr.Blocks(css=css, title="Voice Product Assistant") as demo:
|
313 |
+
# gr.Markdown("# π§ Voice-Activated Product Advisor π")
|
314 |
+
|
315 |
+
# with gr.Row():
|
316 |
+
# chatbot = gr.Chatbot(height=600, bubble_full_width=False)
|
317 |
+
|
318 |
+
# with gr.Column():
|
319 |
+
# # Audio input with status indicator
|
320 |
+
# with gr.Group():
|
321 |
+
# audio_input = gr.Audio(
|
322 |
+
# sources="microphone",
|
323 |
+
# type="filepath",
|
324 |
+
# label="Click mic & speak",
|
325 |
+
# elem_classes="audio-input",
|
326 |
+
# interactive=True
|
327 |
+
# )
|
328 |
+
# mic_status = gr.Markdown("π΄ Mic offline", elem_classes="mic-status")
|
329 |
+
|
330 |
+
# # Text input
|
331 |
+
# text_input = gr.Textbox(label="Or type your request")
|
332 |
+
|
333 |
+
# # Unified submit button
|
334 |
+
# submit_btn = gr.Button("π Get Recommendations", variant="primary")
|
335 |
+
|
336 |
+
# # Handle audio permissions
|
337 |
+
# def request_mic_access():
|
338 |
+
# return gr.update(text="π’ Mic ready") if audio_input.is_enabled else gr.update(text="π΄ Mic blocked")
|
339 |
+
|
340 |
+
# # Process inputs
|
341 |
+
# def process_inputs(text, audio, history):
|
342 |
+
# query = text.strip()
|
343 |
+
# if audio:
|
344 |
+
# transcript = transcribe_audio(audio)
|
345 |
+
# query = f"π§ VOICE: {transcript}"
|
346 |
+
|
347 |
+
# if query:
|
348 |
+
# return history + [(query, None)], ""
|
349 |
+
# return history, ""
|
350 |
+
|
351 |
+
# # Connect all components
|
352 |
+
# audio_input.change(
|
353 |
+
# request_mic_access,
|
354 |
+
# outputs=mic_status,
|
355 |
+
# queue=False
|
356 |
+
# )
|
357 |
+
|
358 |
+
# submit_btn.click(
|
359 |
+
# process_inputs,
|
360 |
+
# [text_input, audio_input, chatbot],
|
361 |
+
# [chatbot, text_input],
|
362 |
+
# queue=False
|
363 |
+
# ).then(
|
364 |
+
# respond,
|
365 |
+
# [chatbot, text_input],
|
366 |
+
# chatbot
|
367 |
+
# )
|
368 |
+
|
369 |
+
# if __name__ == "__main__":
|
370 |
+
# demo.launch(server_port=7860, share=False)
|