Norphel commited on
Commit
6e77289
·
verified ·
1 Parent(s): 0243cab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -13
app.py CHANGED
@@ -20,13 +20,16 @@ def generate_text(audio):
20
  return "No audio received"
21
 
22
  sr, data = audio # Unpack the tuple
23
- print(sr, data.dtype)
24
-
25
- data = data.astype(np.float32) / 32768.0
26
- data = librosa.resample(data, orig_sr=sr, target_sr=16000)
27
- sr = 16000
28
-
29
- print(sr, data.dtype)
 
 
 
30
  inputs = asr_processor(data, sampling_rate=sr, return_tensors="pt", padding=True)
31
 
32
  with torch.no_grad():
@@ -37,15 +40,13 @@ def generate_text(audio):
37
  return asr_processor.decode(pred_ids)
38
 
39
 
 
40
  input_audio = gr.Audio(
41
  sources=["microphone"],
42
- waveform_options=gr.WaveformOptions(
43
- waveform_color="#01C6FF",
44
- waveform_progress_color="#0066B4",
45
- skip_length=2,
46
- show_controls=False,
47
- ),
48
  )
 
49
  demo = gr.Interface(
50
  fn=generate_text,
51
  inputs=input_audio,
 
20
  return "No audio received"
21
 
22
  sr, data = audio # Unpack the tuple
23
+ print(f"Original sample rate: {sr}, dtype: {data.dtype}")
24
+
25
+ # Convert to float32 and ensure 16kHz
26
+ data = data.astype(np.float32)
27
+ if sr != 16000:
28
+ data = librosa.resample(data, orig_sr=sr, target_sr=16000)
29
+ sr = 16000
30
+
31
+ print(f"Processed sample rate: {sr}, dtype: {data.dtype}")
32
+
33
  inputs = asr_processor(data, sampling_rate=sr, return_tensors="pt", padding=True)
34
 
35
  with torch.no_grad():
 
40
  return asr_processor.decode(pred_ids)
41
 
42
 
43
+ # Ensure Gradio records at 16kHz and float32
44
  input_audio = gr.Audio(
45
  sources=["microphone"],
46
+ type="numpy", # Ensures we receive (sr, np.ndarray)
47
+ sample_rate=16000, # Force 16kHz recording
 
 
 
 
48
  )
49
+
50
  demo = gr.Interface(
51
  fn=generate_text,
52
  inputs=input_audio,