Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -97,7 +97,6 @@ def transcribe_function(stream, new_chunk):
|
|
97 |
print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
|
98 |
return stream, "", None
|
99 |
|
100 |
-
# Ensure y is not empty and is at least 1-dimensional
|
101 |
if y is None or len(y) == 0:
|
102 |
return stream, "", None
|
103 |
|
@@ -106,12 +105,22 @@ def transcribe_function(stream, new_chunk):
|
|
106 |
if max_abs_y > 0:
|
107 |
y = y / max_abs_y
|
108 |
|
109 |
-
# Ensure stream is also at least 1-dimensional before concatenation
|
110 |
if stream is not None and len(stream) > 0:
|
111 |
stream = np.concatenate([stream, y])
|
112 |
else:
|
113 |
stream = y
|
114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
# Process the audio data for transcription
|
116 |
result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
|
117 |
full_text = result.get("text", "")
|
@@ -229,13 +238,13 @@ def retriever(question: str):
|
|
229 |
final_response = generate_response_with_prompt(combined_context, question)
|
230 |
return final_response
|
231 |
|
232 |
-
|
233 |
-
# Function to handle the entire audio query and response process
|
234 |
def process_audio_query(audio_input, state):
|
235 |
stream = None
|
236 |
-
# Process the audio input and get the transcription
|
237 |
_, transcription, _ = transcribe_function(stream, audio_input)
|
238 |
-
|
|
|
|
|
|
|
239 |
|
240 |
# Retrieve a response based on the transcription
|
241 |
response_text = retriever(transcription)
|
@@ -247,11 +256,9 @@ def process_audio_query(audio_input, state):
|
|
247 |
# Update the conversation history in the state
|
248 |
state.conversation.append((transcription, response_text))
|
249 |
|
250 |
-
# Return the path of the audio and the updated state
|
251 |
return audio_path, state
|
252 |
|
253 |
|
254 |
-
|
255 |
with gr.Blocks() as demo:
|
256 |
with gr.Row():
|
257 |
with gr.Column():
|
|
|
97 |
print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
|
98 |
return stream, "", None
|
99 |
|
|
|
100 |
if y is None or len(y) == 0:
|
101 |
return stream, "", None
|
102 |
|
|
|
105 |
if max_abs_y > 0:
|
106 |
y = y / max_abs_y
|
107 |
|
|
|
108 |
if stream is not None and len(stream) > 0:
|
109 |
stream = np.concatenate([stream, y])
|
110 |
else:
|
111 |
stream = y
|
112 |
|
113 |
+
# Only process if there is significant audio input
|
114 |
+
if len(stream) > sr * 0.5: # Process if there is at least 0.5 seconds of audio
|
115 |
+
result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
|
116 |
+
full_text = result.get("text", "")
|
117 |
+
if full_text.strip():
|
118 |
+
print(f"Transcription: {full_text}")
|
119 |
+
threading.Thread(target=auto_reset_state).start()
|
120 |
+
return stream, full_text, full_text
|
121 |
+
|
122 |
+
return stream, "", None
|
123 |
+
|
124 |
# Process the audio data for transcription
|
125 |
result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
|
126 |
full_text = result.get("text", "")
|
|
|
238 |
final_response = generate_response_with_prompt(combined_context, question)
|
239 |
return final_response
|
240 |
|
|
|
|
|
241 |
def process_audio_query(audio_input, state):
|
242 |
stream = None
|
|
|
243 |
_, transcription, _ = transcribe_function(stream, audio_input)
|
244 |
+
|
245 |
+
if not transcription.strip():
|
246 |
+
print("No valid transcription detected.")
|
247 |
+
return None, state # Avoid generating a response for empty transcriptions
|
248 |
|
249 |
# Retrieve a response based on the transcription
|
250 |
response_text = retriever(transcription)
|
|
|
256 |
# Update the conversation history in the state
|
257 |
state.conversation.append((transcription, response_text))
|
258 |
|
|
|
259 |
return audio_path, state
|
260 |
|
261 |
|
|
|
262 |
with gr.Blocks() as demo:
|
263 |
with gr.Row():
|
264 |
with gr.Column():
|