KIFF commited on
Commit
6bb833b
·
verified ·
1 Parent(s): 5a76806

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -8
app.py CHANGED
@@ -1,11 +1,8 @@
1
  import torch
2
  import gradio as gr
3
  from transformers import pipeline
4
-
5
- # Add version control
6
- import pkg_resources
7
- gradio_version = pkg_resources.get_distribution("gradio").version
8
- print(f"Gradio version: {gradio_version}")
9
 
10
  MODEL_NAME = "openai/whisper-large-v3"
11
  BATCH_SIZE = 8
@@ -23,7 +20,17 @@ def transcribe(audio_file, task):
23
  if audio_file is None:
24
  raise gr.Error("No audio file submitted! Please upload an audio file before submitting your request.")
25
 
26
- result = pipe(audio_file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
 
 
 
 
 
 
 
 
 
 
27
 
28
  output = ""
29
  for chunk in result["chunks"]:
@@ -48,7 +55,7 @@ demo = gr.Interface(
48
  fn=transcribe,
49
  inputs=[audio_input, task_input],
50
  outputs=output,
51
- title=f"Whisper Large V3: Transcribe Audio with Timestamps (Gradio v{gradio_version})",
52
  description=(
53
  f"Transcribe audio files with Whisper Large V3 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}). "
54
  "Upload an audio file and choose whether to transcribe or translate. "
@@ -57,4 +64,4 @@ demo = gr.Interface(
57
  )
58
 
59
  if __name__ == "__main__":
60
- demo.launch()
 
1
  import torch
2
  import gradio as gr
3
  from transformers import pipeline
4
+ import numpy as np
5
+ import librosa
 
 
 
6
 
7
  MODEL_NAME = "openai/whisper-large-v3"
8
  BATCH_SIZE = 8
 
20
  if audio_file is None:
21
  raise gr.Error("No audio file submitted! Please upload an audio file before submitting your request.")
22
 
23
+ # Load audio file
24
+ try:
25
+ # Use librosa to load the audio file
26
+ audio, sr = librosa.load(audio_file, sr=16000) # Whisper expects 16kHz sampling rate
27
+ except Exception as e:
28
+ raise gr.Error(f"Error loading audio file: {str(e)}")
29
+
30
+ # Convert to format expected by Whisper
31
+ inputs = {"array": audio, "sampling_rate": sr}
32
+
33
+ result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
34
 
35
  output = ""
36
  for chunk in result["chunks"]:
 
55
  fn=transcribe,
56
  inputs=[audio_input, task_input],
57
  outputs=output,
58
+ title=f"Whisper Large V3: Transcribe Audio with Timestamps",
59
  description=(
60
  f"Transcribe audio files with Whisper Large V3 [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}). "
61
  "Upload an audio file and choose whether to transcribe or translate. "
 
64
  )
65
 
66
  if __name__ == "__main__":
67
+ demo.launch()