Pijush2023 commited on
Commit
35593b5
·
verified ·
1 Parent(s): 680dd01

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -30
app.py CHANGED
@@ -133,25 +133,15 @@ pipe_asr = pipeline(
133
  )
134
 
135
  # Function to handle audio input, transcribe, fetch from Neo4j, and generate audio response
136
- def transcribe_and_respond(stream, new_chunk):
137
- try:
138
- sr, y = new_chunk[0], new_chunk[1]
139
- except TypeError:
140
- print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
141
- return stream, "", None
142
-
143
- y = y.astype(np.float32)
144
- max_abs_y = np.max(np.abs(y))
145
- if max_abs_y > 0:
146
- y = y / max_abs_y
147
 
148
- if stream is not None and len(stream) > 0:
149
- stream = np.concatenate([stream, y])
150
- else:
151
- stream = y
152
 
153
  # Transcribe the audio using Whisper
154
- result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
155
  question = result.get("text", "")
156
 
157
  # Retrieve information from Neo4j
@@ -160,20 +150,15 @@ def transcribe_and_respond(stream, new_chunk):
160
  # Convert the response to audio using Eleven Labs TTS
161
  audio_path = generate_audio_elevenlabs(response_text) if response_text else None
162
 
163
- return stream, question, audio_path
164
 
165
- # Function to clear the transcription state
166
- def clear_transcription_state():
167
- return None, "", None
168
 
169
  # Define the Gradio interface with only audio input and output
170
  with gr.Blocks(theme="rawrsor1/Everforest") as demo:
171
  with gr.Row():
172
  audio_input = gr.Audio(
173
  sources=["microphone"],
174
- streaming=True,
175
  type='numpy',
176
- every=0.1,
177
  label="Speak to Ask"
178
  )
179
  audio_output = gr.Audio(
@@ -183,19 +168,18 @@ with gr.Blocks(theme="rawrsor1/Everforest") as demo:
183
  interactive=False
184
  )
185
 
186
- # Speech-to-Text to TTS functionality with Neo4j retrieval
187
- state = gr.State()
188
- audio_input.stream(
189
- transcribe_and_respond,
190
- inputs=[state, audio_input],
191
- outputs=[state, audio_output],
192
- api_name="api_voice_to_neo4j_response"
193
  )
194
 
195
  # Clear state interaction
196
  gr.Button("Clear State").click(
197
  fn=clear_transcription_state,
198
- outputs=[state, audio_output],
199
  api_name="api_clean_state"
200
  )
201
 
 
133
  )
134
 
135
  # Function to handle audio input, transcribe, fetch from Neo4j, and generate audio response
136
+ def transcribe_and_respond(audio):
137
+ if audio is None:
138
+ return None, "No audio provided."
 
 
 
 
 
 
 
 
139
 
140
+ sr, y = audio
141
+ y = np.array(y).astype(np.float32)
 
 
142
 
143
  # Transcribe the audio using Whisper
144
+ result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False)
145
  question = result.get("text", "")
146
 
147
  # Retrieve information from Neo4j
 
150
  # Convert the response to audio using Eleven Labs TTS
151
  audio_path = generate_audio_elevenlabs(response_text) if response_text else None
152
 
153
+ return audio_path, response_text
154
 
 
 
 
155
 
156
  # Define the Gradio interface with only audio input and output
157
  with gr.Blocks(theme="rawrsor1/Everforest") as demo:
158
  with gr.Row():
159
  audio_input = gr.Audio(
160
  sources=["microphone"],
 
161
  type='numpy',
 
162
  label="Speak to Ask"
163
  )
164
  audio_output = gr.Audio(
 
168
  interactive=False
169
  )
170
 
171
+ # Submit button to process the audio input
172
+ submit_btn = gr.Button("Submit")
173
+ submit_btn.click(
174
+ fn=transcribe_and_respond,
175
+ inputs=audio_input,
176
+ outputs=[audio_output, gr.Textbox(label="Transcription")]
 
177
  )
178
 
179
  # Clear state interaction
180
  gr.Button("Clear State").click(
181
  fn=clear_transcription_state,
182
+ outputs=[audio_output],
183
  api_name="api_clean_state"
184
  )
185