Norphel commited on
Commit
bb5cc1d
·
verified ·
1 Parent(s): 6e77289

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -9
app.py CHANGED
@@ -19,17 +19,21 @@ def generate_text(audio):
19
  if audio is None:
20
  return "No audio received"
21
 
22
- sr, data = audio # Unpack the tuple
23
  print(f"Original sample rate: {sr}, dtype: {data.dtype}")
24
 
25
- # Convert to float32 and ensure 16kHz
26
  data = data.astype(np.float32)
27
- if sr != 16000:
28
- data = librosa.resample(data, orig_sr=sr, target_sr=16000)
29
- sr = 16000
 
 
 
30
 
31
  print(f"Processed sample rate: {sr}, dtype: {data.dtype}")
32
 
 
33
  inputs = asr_processor(data, sampling_rate=sr, return_tensors="pt", padding=True)
34
 
35
  with torch.no_grad():
@@ -39,12 +43,10 @@ def generate_text(audio):
39
  # Decode the prediction
40
  return asr_processor.decode(pred_ids)
41
 
42
-
43
- # Ensure Gradio records at 16kHz and float32
44
  input_audio = gr.Audio(
45
  sources=["microphone"],
46
- type="numpy", # Ensures we receive (sr, np.ndarray)
47
- sample_rate=16000, # Force 16kHz recording
48
  )
49
 
50
  demo = gr.Interface(
 
19
  if audio is None:
20
  return "No audio received"
21
 
22
+ sr, data = audio # Unpack the tuple (sample rate, numpy array)
23
  print(f"Original sample rate: {sr}, dtype: {data.dtype}")
24
 
25
+ # Convert to float32
26
  data = data.astype(np.float32)
27
+
28
+ # Resample to 16kHz if necessary
29
+ target_sr = 16000
30
+ if sr != target_sr:
31
+ data = librosa.resample(data, orig_sr=sr, target_sr=target_sr)
32
+ sr = target_sr
33
 
34
  print(f"Processed sample rate: {sr}, dtype: {data.dtype}")
35
 
36
+ # Tokenize and run inference
37
  inputs = asr_processor(data, sampling_rate=sr, return_tensors="pt", padding=True)
38
 
39
  with torch.no_grad():
 
43
  # Decode the prediction
44
  return asr_processor.decode(pred_ids)
45
 
46
+ # Ensure we get a NumPy array from Gradio
 
47
  input_audio = gr.Audio(
48
  sources=["microphone"],
49
+ type="numpy", # Ensures function gets (sr, np.ndarray)
 
50
  )
51
 
52
  demo = gr.Interface(