Pijush2023 commited on
Commit
639b327
·
verified ·
1 Parent(s): 61ae7dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -126,6 +126,7 @@ pipe_asr = pipeline(
126
  return_timestamps=True
127
  )
128
 
 
129
  # Function to handle audio input, transcribe, fetch from Neo4j, and generate audio response
130
  def transcribe_and_respond(audio):
131
  if audio is None:
@@ -135,10 +136,12 @@ def transcribe_and_respond(audio):
135
  sr, y = audio
136
  y = np.array(y).astype(np.float32)
137
 
138
- # Normalize the audio array
139
- max_abs_y = np.max(np.abs(y))
140
- if max_abs_y > 0:
141
- y = y / max_abs_y
 
 
142
 
143
  # Prepare input_features for Whisper model
144
  input_features = processor(y, sampling_rate=sr, return_tensors="pt").input_features
 
126
  return_timestamps=True
127
  )
128
 
129
+ # Function to handle audio input, transcribe, fetch from Neo4j, and generate audio response
130
  # Function to handle audio input, transcribe, fetch from Neo4j, and generate audio response
131
  def transcribe_and_respond(audio):
132
  if audio is None:
 
136
  sr, y = audio
137
  y = np.array(y).astype(np.float32)
138
 
139
+ # Resample to 16kHz if needed
140
+ target_sr = 16000
141
+ if sr != target_sr:
142
+ logging.debug(f"Resampling audio from {sr} Hz to {target_sr} Hz.")
143
+ y = torchaudio.functional.resample(torch.tensor(y), orig_freq=sr, new_freq=target_sr).numpy()
144
+ sr = target_sr
145
 
146
  # Prepare input_features for Whisper model
147
  input_features = processor(y, sampling_rate=sr, return_tensors="pt").input_features