Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,8 @@
|
|
1 |
import torch
|
2 |
import gradio as gr
|
3 |
from transformers import pipeline
|
4 |
-
|
5 |
-
|
6 |
-
import pkg_resources
|
7 |
-
gradio_version = pkg_resources.get_distribution("gradio").version
|
8 |
-
print(f"Gradio version: {gradio_version}")
|
9 |
|
10 |
MODEL_NAME = "openai/whisper-large-v3"
|
11 |
BATCH_SIZE = 8
|
@@ -23,7 +20,17 @@ def transcribe(audio_file, task):
|
|
23 |
if audio_file is None:
|
24 |
raise gr.Error("No audio file submitted! Please upload an audio file before submitting your request.")
|
25 |
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
output = ""
|
29 |
for chunk in result["chunks"]:
|
@@ -48,7 +55,7 @@ demo = gr.Interface(
|
|
48 |
fn=transcribe,
|
49 |
inputs=[audio_input, task_input],
|
50 |
outputs=output,
|
51 |
-
title=f"Whisper Large V3: Transcribe Audio with Timestamps
|
52 |
description=(
|
53 |
f"Transcribe audio files with Whisper Large V3 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}). "
|
54 |
"Upload an audio file and choose whether to transcribe or translate. "
|
@@ -57,4 +64,4 @@ demo = gr.Interface(
|
|
57 |
)
|
58 |
|
59 |
if __name__ == "__main__":
|
60 |
-
demo.launch()
|
|
|
1 |
import torch
|
2 |
import gradio as gr
|
3 |
from transformers import pipeline
|
4 |
+
import numpy as np
|
5 |
+
import librosa
|
|
|
|
|
|
|
6 |
|
7 |
MODEL_NAME = "openai/whisper-large-v3"
|
8 |
BATCH_SIZE = 8
|
|
|
20 |
if audio_file is None:
|
21 |
raise gr.Error("No audio file submitted! Please upload an audio file before submitting your request.")
|
22 |
|
23 |
+
# Load audio file
|
24 |
+
try:
|
25 |
+
# Use librosa to load the audio file
|
26 |
+
audio, sr = librosa.load(audio_file, sr=16000) # Whisper expects 16kHz sampling rate
|
27 |
+
except Exception as e:
|
28 |
+
raise gr.Error(f"Error loading audio file: {str(e)}")
|
29 |
+
|
30 |
+
# Convert to format expected by Whisper
|
31 |
+
inputs = {"array": audio, "sampling_rate": sr}
|
32 |
+
|
33 |
+
result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
|
34 |
|
35 |
output = ""
|
36 |
for chunk in result["chunks"]:
|
|
|
55 |
fn=transcribe,
|
56 |
inputs=[audio_input, task_input],
|
57 |
outputs=output,
|
58 |
+
title=f"Whisper Large V3: Transcribe Audio with Timestamps",
|
59 |
description=(
|
60 |
f"Transcribe audio files with Whisper Large V3 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}). "
|
61 |
"Upload an audio file and choose whether to transcribe or translate. "
|
|
|
64 |
)
|
65 |
|
66 |
if __name__ == "__main__":
|
67 |
+
demo.launch()
|