Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline
|
3 |
import torch
|
|
|
4 |
import numpy as np
|
5 |
|
6 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
@@ -13,6 +14,10 @@ def transcribe(audio):
|
|
13 |
if not isinstance(audio_data, np.ndarray):
|
14 |
audio_data = np.array(audio_data)
|
15 |
|
|
|
|
|
|
|
|
|
16 |
# Reshape the audio data to match the expected input format (1, num_samples)
|
17 |
if audio_data.ndim == 1:
|
18 |
audio_data = np.expand_dims(audio_data, axis=0)
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline
|
3 |
import torch
|
4 |
+
import torchaudio
|
5 |
import numpy as np
|
6 |
|
7 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
|
14 |
if not isinstance(audio_data, np.ndarray):
|
15 |
audio_data = np.array(audio_data)
|
16 |
|
17 |
+
# Convert to mono if the audio is stereo
|
18 |
+
if audio_data.ndim > 1 and audio_data.shape[0] > 1:
|
19 |
+
audio_data = np.mean(audio_data, axis=0)
|
20 |
+
|
21 |
# Reshape the audio data to match the expected input format (1, num_samples)
|
22 |
if audio_data.ndim == 1:
|
23 |
audio_data = np.expand_dims(audio_data, axis=0)
|