demavior commited on
Commit
d66d2e5
·
verified ·
1 Parent(s): 4639258

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -0
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
  import torch
 
4
  import numpy as np
5
 
6
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -13,6 +14,10 @@ def transcribe(audio):
13
  if not isinstance(audio_data, np.ndarray):
14
  audio_data = np.array(audio_data)
15
 
 
 
 
 
16
  # Reshape the audio data to match the expected input format (1, num_samples)
17
  if audio_data.ndim == 1:
18
  audio_data = np.expand_dims(audio_data, axis=0)
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  import torch
4
+ import torchaudio
5
  import numpy as np
6
 
7
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
14
  if not isinstance(audio_data, np.ndarray):
15
  audio_data = np.array(audio_data)
16
 
17
+ # Convert to mono if the audio is stereo
18
+ if audio_data.ndim > 1 and audio_data.shape[0] > 1:
19
+ audio_data = np.mean(audio_data, axis=0)
20
+
21
  # Reshape the audio data to match the expected input format (1, num_samples)
22
  if audio_data.ndim == 1:
23
  audio_data = np.expand_dims(audio_data, axis=0)