Pijush2023 commited on
Commit
0436599
·
verified ·
1 Parent(s): 7a0c9f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -8
app.py CHANGED
@@ -97,7 +97,6 @@ def transcribe_function(stream, new_chunk):
97
  print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
98
  return stream, "", None
99
 
100
- # Ensure y is not empty and is at least 1-dimensional
101
  if y is None or len(y) == 0:
102
  return stream, "", None
103
 
@@ -106,12 +105,22 @@ def transcribe_function(stream, new_chunk):
106
  if max_abs_y > 0:
107
  y = y / max_abs_y
108
 
109
- # Ensure stream is also at least 1-dimensional before concatenation
110
  if stream is not None and len(stream) > 0:
111
  stream = np.concatenate([stream, y])
112
  else:
113
  stream = y
114
 
 
 
 
 
 
 
 
 
 
 
 
115
  # Process the audio data for transcription
116
  result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
117
  full_text = result.get("text", "")
@@ -229,13 +238,13 @@ def retriever(question: str):
229
  final_response = generate_response_with_prompt(combined_context, question)
230
  return final_response
231
 
232
-
233
- # Function to handle the entire audio query and response process
234
  def process_audio_query(audio_input, state):
235
  stream = None
236
- # Process the audio input and get the transcription
237
  _, transcription, _ = transcribe_function(stream, audio_input)
238
- print(f"Transcription: {transcription}")
 
 
 
239
 
240
  # Retrieve a response based on the transcription
241
  response_text = retriever(transcription)
@@ -247,11 +256,9 @@ def process_audio_query(audio_input, state):
247
  # Update the conversation history in the state
248
  state.conversation.append((transcription, response_text))
249
 
250
- # Return the path of the audio and the updated state
251
  return audio_path, state
252
 
253
 
254
-
255
  with gr.Blocks() as demo:
256
  with gr.Row():
257
  with gr.Column():
 
97
  print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
98
  return stream, "", None
99
 
 
100
  if y is None or len(y) == 0:
101
  return stream, "", None
102
 
 
105
  if max_abs_y > 0:
106
  y = y / max_abs_y
107
 
 
108
  if stream is not None and len(stream) > 0:
109
  stream = np.concatenate([stream, y])
110
  else:
111
  stream = y
112
 
113
+ # Only process if there is significant audio input
114
+ if len(stream) > sr * 0.5: # Process if there is at least 0.5 seconds of audio
115
+ result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
116
+ full_text = result.get("text", "")
117
+ if full_text.strip():
118
+ print(f"Transcription: {full_text}")
119
+ threading.Thread(target=auto_reset_state).start()
120
+ return stream, full_text, full_text
121
+
122
+ return stream, "", None
123
+
124
  # Process the audio data for transcription
125
  result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
126
  full_text = result.get("text", "")
 
238
  final_response = generate_response_with_prompt(combined_context, question)
239
  return final_response
240
 
 
 
241
  def process_audio_query(audio_input, state):
242
  stream = None
 
243
  _, transcription, _ = transcribe_function(stream, audio_input)
244
+
245
+ if not transcription.strip():
246
+ print("No valid transcription detected.")
247
+ return None, state # Avoid generating a response for empty transcriptions
248
 
249
  # Retrieve a response based on the transcription
250
  response_text = retriever(transcription)
 
256
  # Update the conversation history in the state
257
  state.conversation.append((transcription, response_text))
258
 
 
259
  return audio_path, state
260
 
261
 
 
262
  with gr.Blocks() as demo:
263
  with gr.Row():
264
  with gr.Column():