Pijush2023 commited on
Commit
3db5a03
·
verified ·
1 Parent(s): eb0fa43

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -7
app.py CHANGED
@@ -479,17 +479,24 @@ base_audio_drive = "/data/audio"
479
  # return stream, full_text, result
480
 
481
 
 
 
 
 
 
 
 
 
 
 
 
482
  def transcribe_function(stream, new_chunk):
483
  sr, y = new_chunk[0], new_chunk[1]
484
  y = y.astype(np.float32) / np.max(np.abs(y))
485
- if stream is not None:
486
- stream = np.concatenate([stream, y])
487
- else:
488
- stream = y
489
  result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
490
  full_text = result.get("text", "")
491
- return stream, full_text # Return the transcribed text
492
-
493
 
494
 
495
  def update_map_with_response(history):
@@ -703,7 +710,8 @@ with gr.Blocks(theme='Pijush2023/scikit-learn-pijush') as demo:
703
  clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
704
 
705
  audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
706
- audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="voice_query_to_text")
 
707
 
708
 
709
 
 
479
  # return stream, full_text, result
480
 
481
 
482
+ # def transcribe_function(stream, new_chunk):
483
+ # sr, y = new_chunk[0], new_chunk[1]
484
+ # y = y.astype(np.float32) / np.max(np.abs(y))
485
+ # if stream is not None:
486
+ # stream = np.concatenate([stream, y])
487
+ # else:
488
+ # stream = y
489
+ # result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
490
+ # full_text = result.get("text", "")
491
+ # return stream, full_text # Return the transcribed text
492
+
493
  def transcribe_function(stream, new_chunk):
494
  sr, y = new_chunk[0], new_chunk[1]
495
  y = y.astype(np.float32) / np.max(np.abs(y))
496
+ stream = y # Always start with fresh stream
 
 
 
497
  result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
498
  full_text = result.get("text", "")
499
+ return stream, full_text
 
500
 
501
 
502
  def update_map_with_response(history):
 
710
  clear_button.click(fn=clear_textbox, inputs=None, outputs=chat_input)
711
 
712
  audio_input = gr.Audio(sources=["microphone"], streaming=True, type='numpy')
713
+ # audio_input.stream(transcribe_function, inputs=[state, audio_input], outputs=[state, chat_input], api_name="voice_query_to_text")
714
+ audio_input.stream(transcribe_function, inputs=[None, audio_input], outputs=[None, chat_input], api_name="voice_query_to_text")
715
 
716
 
717