Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -210,31 +210,41 @@ pipe_asr = pipeline(
|
|
210 |
return_timestamps=False
|
211 |
)
|
212 |
|
213 |
-
# Function to
|
214 |
-
def
|
215 |
try:
|
216 |
-
sr, y =
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
logging.error(f"Error during real-time transcription: {e}")
|
228 |
-
return "Error processing the audio, please try again."
|
229 |
|
|
|
|
|
230 |
|
231 |
-
|
232 |
-
def clear_state():
|
233 |
-
return ""
|
234 |
|
235 |
# Define the Gradio interface
|
236 |
with gr.Blocks(theme="rawrsor1/Everforest") as demo:
|
237 |
-
audio_input = gr.Audio(sources=["microphone"], type='numpy', streaming=
|
238 |
transcription_textbox = gr.Textbox(label="Transcription", interactive=False)
|
239 |
submit_voice_btn = gr.Button("Submit Voice")
|
240 |
clear_state_btn = gr.Button("Clear State")
|
@@ -242,12 +252,12 @@ with gr.Blocks(theme="rawrsor1/Everforest") as demo:
|
|
242 |
|
243 |
# Update the transcription text in real-time as the user speaks
|
244 |
audio_input.stream(
|
245 |
-
fn=
|
246 |
-
inputs=audio_input,
|
247 |
-
outputs=transcription_textbox
|
248 |
)
|
249 |
|
250 |
-
#
|
251 |
def handle_submit(text):
|
252 |
# Placeholder function, could trigger response generation or other actions
|
253 |
return f"You submitted: {text}"
|
|
|
210 |
return_timestamps=False
|
211 |
)
|
212 |
|
213 |
+
# Function to handle audio transcription in real-time
|
214 |
+
def transcribe_function(stream, new_chunk):
|
215 |
try:
|
216 |
+
sr, y = new_chunk[0], new_chunk[1]
|
217 |
+
except TypeError:
|
218 |
+
print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
|
219 |
+
return stream, "", None
|
220 |
+
|
221 |
+
# Ensure y is not empty and is at least 1-dimensional
|
222 |
+
if y is None or len(y) == 0:
|
223 |
+
return stream, "", None
|
224 |
+
|
225 |
+
y = y.astype(np.float32)
|
226 |
+
max_abs_y = np.max(np.abs(y))
|
227 |
+
if max_abs_y > 0:
|
228 |
+
y = y / max_abs_y
|
229 |
+
|
230 |
+
# Ensure stream is also at least 1-dimensional before concatenation
|
231 |
+
if stream is not None and len(stream) > 0:
|
232 |
+
stream = np.concatenate([stream, y])
|
233 |
+
else:
|
234 |
+
stream = y
|
235 |
|
236 |
+
# Process the audio data for transcription
|
237 |
+
result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
|
238 |
+
full_text = result.get("text", "")
|
|
|
|
|
239 |
|
240 |
+
# Start a thread to reset the state after 10 seconds
|
241 |
+
threading.Thread(target=auto_reset_state).start()
|
242 |
|
243 |
+
return stream, full_text, full_text
|
|
|
|
|
244 |
|
245 |
# Define the Gradio interface
|
246 |
with gr.Blocks(theme="rawrsor1/Everforest") as demo:
|
247 |
+
audio_input = gr.Audio(sources=["microphone"], type='numpy', streaming=True, label="Speak to Ask")
|
248 |
transcription_textbox = gr.Textbox(label="Transcription", interactive=False)
|
249 |
submit_voice_btn = gr.Button("Submit Voice")
|
250 |
clear_state_btn = gr.Button("Clear State")
|
|
|
252 |
|
253 |
# Update the transcription text in real-time as the user speaks
|
254 |
audio_input.stream(
|
255 |
+
fn=transcribe_function,
|
256 |
+
inputs=[None, audio_input],
|
257 |
+
outputs=[None, transcription_textbox, transcription_textbox]
|
258 |
)
|
259 |
|
260 |
+
# Placeholder function for handling submission
|
261 |
def handle_submit(text):
|
262 |
# Placeholder function, could trigger response generation or other actions
|
263 |
return f"You submitted: {text}"
|