Pijush2023 commited on
Commit
45f988c
·
verified ·
1 Parent(s): 7da87e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -28
app.py CHANGED
@@ -90,32 +90,23 @@ def auto_reset_state():
90
  time.sleep(2)
91
  return AppState() # Reset the state
92
 
93
- # Function to process audio input and transcribe it
94
- def transcribe_function(state: AppState, new_chunk):
95
- try:
96
- sr, y = new_chunk[0], new_chunk[1]
97
- except TypeError:
98
- print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
99
- return state, ""
100
-
101
- if y is None or len(y) == 0:
102
- return state, ""
103
-
104
- y = y.astype(np.float32)
105
- max_abs_y = np.max(np.abs(y))
106
- if max_abs_y > 0:
107
- y = y / max_abs_y
108
-
109
- if state.stream is not None and len(state.stream) > 0:
110
- state.stream = np.concatenate([state.stream, y])
111
  else:
112
- state.stream = y
113
 
114
- result = pipe_asr({"array": state.stream, "sampling_rate": sr}, return_timestamps=False)
115
- full_text = result.get("text", "")
 
 
 
 
 
 
116
 
117
- threading.Thread(target=auto_reset_state).start()
118
- return state, full_text
119
 
120
 
121
  # Function to generate a full-text search query for Neo4j
@@ -204,10 +195,14 @@ def retriever(question: str):
204
 
205
  # Function to handle the entire audio query and response process
206
  def process_audio_query(state: AppState, audio_input):
207
- state, transcription = transcribe_function(state, audio_input)
208
- response_text = retriever(transcription)
209
- audio_path = generate_audio_elevenlabs(response_text)
210
- return audio_path, state
 
 
 
 
211
 
212
  # Create Gradio interface for audio input and output
213
  with gr.Blocks() as interface:
@@ -219,4 +214,4 @@ with gr.Blocks() as interface:
219
  submit_button.click(fn=process_audio_query, inputs=[state, audio_input], outputs=[audio_output, state])
220
 
221
  # Launch the Gradio app
222
- interface.launch()
 
90
  time.sleep(2)
91
  return AppState() # Reset the state
92
 
93
+ # Function to process audio input and handle pause detection
94
+ def process_audio(audio: tuple, state: AppState):
95
+ if state.stream is None:
96
+ state.stream = audio[1]
97
+ state.sampling_rate = audio[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  else:
99
+ state.stream = np.concatenate((state.stream, audio[1]))
100
 
101
+ # Detect pauses in the audio stream
102
+ pause_detected = determine_pause(state.stream, state.sampling_rate, state)
103
+ state.pause_detected = pause_detected
104
+
105
+ # If a pause is detected and the user has started talking, stop recording
106
+ if state.pause_detected and state.started_talking:
107
+ return gr.Audio(recording=False), state
108
+ return None, state
109
 
 
 
110
 
111
 
112
  # Function to generate a full-text search query for Neo4j
 
195
 
196
  # Function to handle the entire audio query and response process
197
  def process_audio_query(state: AppState, audio_input):
198
+ state, _ = process_audio(audio_input, state)
199
+ if state.pause_detected:
200
+ # Perform transcription once pause is detected
201
+ transcription = pipe_asr({"array": state.stream, "sampling_rate": state.sampling_rate}, return_timestamps=False)["text"]
202
+ response_text = retriever(transcription)
203
+ audio_path = generate_audio_elevenlabs(response_text)
204
+ return audio_path, state
205
+ return None, state
206
 
207
  # Create Gradio interface for audio input and output
208
  with gr.Blocks() as interface:
 
214
  submit_button.click(fn=process_audio_query, inputs=[state, audio_input], outputs=[audio_output, state])
215
 
216
  # Launch the Gradio app
217
+ interface.launch(show_error=True)