Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,41 +1,36 @@
|
|
1 |
-
import os
|
2 |
-
os.system('pip install soundfile')
|
3 |
-
|
4 |
import gradio as gr
|
5 |
from transformers import pipeline
|
6 |
import torch
|
7 |
-
import
|
8 |
-
import io
|
9 |
|
10 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
11 |
|
12 |
def transcribe(audio):
|
13 |
-
|
14 |
# Extract the audio data from the tuple
|
15 |
audio_data = audio[0] if isinstance(audio, tuple) else audio
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
20 |
|
21 |
pipe = pipeline(
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
)
|
27 |
|
28 |
-
prediction = pipe(
|
29 |
-
|
30 |
return prediction
|
31 |
|
32 |
gradio_app = gr.Interface(
|
33 |
-
transcribe,
|
34 |
-
inputs=gr.Audio(label="Input")
|
35 |
-
outputs=gr.Textbox(label="Result")
|
36 |
title="Transcribed",
|
37 |
)
|
38 |
|
39 |
-
|
40 |
if __name__ == "__main__":
|
41 |
-
gradio_app.launch()
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline
|
3 |
import torch
|
4 |
+
import torchaudio
|
|
|
5 |
|
6 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
7 |
|
8 |
def transcribe(audio):
|
|
|
9 |
# Extract the audio data from the tuple
|
10 |
audio_data = audio[0] if isinstance(audio, tuple) else audio
|
11 |
+
|
12 |
+
# Load the audio data using torchaudio
|
13 |
+
waveform, sample_rate = torchaudio.load(audio_data)
|
14 |
+
|
15 |
+
# Convert the waveform to numpy array
|
16 |
+
waveform_np = waveform.numpy()
|
17 |
|
18 |
pipe = pipeline(
|
19 |
+
"automatic-speech-recognition",
|
20 |
+
model="openai/whisper-small",
|
21 |
+
chunk_length_s=30,
|
22 |
+
device=device,
|
23 |
)
|
24 |
|
25 |
+
prediction = pipe(waveform_np)["text"]
|
|
|
26 |
return prediction
|
27 |
|
28 |
gradio_app = gr.Interface(
|
29 |
+
fn=transcribe,
|
30 |
+
inputs=gr.Audio(label="Input"),
|
31 |
+
outputs=gr.Textbox(label="Result"),
|
32 |
title="Transcribed",
|
33 |
)
|
34 |
|
|
|
35 |
if __name__ == "__main__":
|
36 |
+
gradio_app.launch()
|