Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -243,6 +243,47 @@ def insert_prompt(current_text, prompt):
|
|
243 |
return prompt[0] if prompt else current_text
|
244 |
|
245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
|
247 |
# Create the Gradio Blocks interface
|
248 |
with gr.Blocks(theme="rawrsor1/Everforest") as demo:
|
@@ -250,6 +291,7 @@ with gr.Blocks(theme="rawrsor1/Everforest") as demo:
|
|
250 |
with gr.Row():
|
251 |
with gr.Column():
|
252 |
question_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...")
|
|
|
253 |
|
254 |
|
255 |
|
@@ -276,6 +318,10 @@ with gr.Blocks(theme="rawrsor1/Everforest") as demo:
|
|
276 |
# Define interaction for hitting the Enter key
|
277 |
question_input.submit(fn=add_message, inputs=[chatbot, question_input], outputs=[chatbot, question_input])\
|
278 |
.then(fn=chat_with_bot, inputs=[chatbot], outputs=chatbot)
|
|
|
|
|
|
|
|
|
279 |
|
280 |
|
281 |
generate_audio_btn.click(fn=generate_audio_from_last_response, inputs=chatbot, outputs=audio_output)
|
|
|
243 |
return prompt[0] if prompt else current_text
|
244 |
|
245 |
|
246 |
+
# Define the ASR model with Whisper
|
247 |
+
model_id = 'openai/whisper-small'
|
248 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
249 |
+
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
250 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device)
|
251 |
+
processor = AutoProcessor.from_pretrained(model_id)
|
252 |
+
|
253 |
+
pipe_asr = pipeline(
|
254 |
+
"automatic-speech-recognition",
|
255 |
+
model=model,
|
256 |
+
tokenizer=processor.tokenizer,
|
257 |
+
feature_extractor=processor.feature_extractor,
|
258 |
+
max_new_tokens=128,
|
259 |
+
chunk_length_s=15,
|
260 |
+
batch_size=16,
|
261 |
+
torch_dtype=torch_dtype,
|
262 |
+
device=device,
|
263 |
+
return_timestamps=True
|
264 |
+
)
|
265 |
+
|
266 |
+
# Define the transcription function for streaming audio
|
267 |
+
def transcribe_function(stream, new_chunk):
|
268 |
+
try:
|
269 |
+
sr, y = new_chunk[0], new_chunk[1]
|
270 |
+
except TypeError:
|
271 |
+
print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
|
272 |
+
return stream, "", None
|
273 |
+
|
274 |
+
y = y.astype(np.float32) / np.max(np.abs(y))
|
275 |
+
|
276 |
+
if stream is not None:
|
277 |
+
stream = np.concatenate([stream, y])
|
278 |
+
else:
|
279 |
+
stream = y
|
280 |
+
|
281 |
+
result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
|
282 |
+
full_text = result.get("text", "")
|
283 |
+
|
284 |
+
return stream, full_text, full_text
|
285 |
+
|
286 |
+
|
287 |
|
288 |
# Create the Gradio Blocks interface
|
289 |
with gr.Blocks(theme="rawrsor1/Everforest") as demo:
|
|
|
291 |
with gr.Row():
|
292 |
with gr.Column():
|
293 |
question_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...")
|
294 |
+
audio_input = gr.Audio(sources=["microphone"],streaming=True,type='numpy',every=0.1,label="Speak to Ask")
|
295 |
|
296 |
|
297 |
|
|
|
318 |
# Define interaction for hitting the Enter key
|
319 |
question_input.submit(fn=add_message, inputs=[chatbot, question_input], outputs=[chatbot, question_input])\
|
320 |
.then(fn=chat_with_bot, inputs=[chatbot], outputs=chatbot)
|
321 |
+
|
322 |
+
# Speech-to-Text functionality
|
323 |
+
state = gr.State()
|
324 |
+
audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, question_input])
|
325 |
|
326 |
|
327 |
generate_audio_btn.click(fn=generate_audio_from_last_response, inputs=chatbot, outputs=audio_output)
|