avfranco commited on
Commit
03ddc3f
·
verified ·
1 Parent(s): c939bfd

Update app.py

Browse files

Updated based on https://huggingface.co/spaces/hf-audio/whisper-large-v3

Files changed (1) hide show
  1. app.py +24 -59
app.py CHANGED
@@ -1,75 +1,40 @@
 
 
1
  import gradio as gr
2
- import os
3
- from pydub import AudioSegment
4
 
5
- def audio_converter(audio_file:str):
6
- audio_input = AudioSegment.from_file(audio_file,'m4a')
7
- audio_input_name = os.path.splitext(audio_file)[0]
8
- audio_wav_filename = f"{audio_input_name}.wav"
9
- audio_input.export(audio_wav_filename, 'wav')
10
-
11
- return audio_wav_filename
12
 
13
- def asr_transcriber(audio_file):
14
- from transformers import pipeline
15
- import torch
16
 
17
- audio_file_wav = audio_converter(audio_file)
 
 
 
 
 
18
 
19
- # Check for CUDA availability (GPU)
20
- if torch.cuda.is_available():
21
- device_id = torch.device('cuda')
22
- else:
23
- device_id = torch.device('cpu')
24
 
25
- torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
26
-
27
- #Mac runtime
28
- #device_id = "mps"
29
- #torch_dtype = torch.float16
30
- flash = False
31
- ts = False
32
-
33
- #Try to optimize when CPU and float32
34
- model_id = "openai/whisper-tiny"
35
-
36
-
37
- # Initialize the ASR pipeline
38
- pipe = pipeline(
39
- "automatic-speech-recognition",
40
- model=model_id,
41
- torch_dtype=torch_dtype,
42
- device=device_id
43
- )
44
-
45
- if device_id == "mps":
46
- torch.mps.empty_cache()
47
- elif not flash:
48
- pipe.model = pipe.model.to_bettertransformer()
49
-
50
- language = None
51
- task = "transcribe"
52
-
53
- json_output = pipe(
54
- audio_file_wav,
55
- chunk_length_s=30,
56
- batch_size=8,
57
- generate_kwargs={"task": task, "language": language},
58
- return_timestamps=ts
59
- )
60
-
61
- return json_output["text"]
62
 
63
  with gr.Blocks() as transcriberUI:
64
  gr.Markdown(
65
  """
66
- # Ola Xara & Solange!
67
  Clicar no botao abaixo para selecionar o Audio a ser transcrito!
68
- Ambiente Demo disponivel 24x7. Running on CPU Upgrade with openai/whisper-tiny
69
  """)
70
- inp = gr.File(label="Arquivo de Audio", show_label=True, file_count="single", file_types=["m4a"])
71
  transcribe = gr.Textbox(label="Transcricao", show_label=True, show_copy_button=True)
72
- inp.upload(asr_transcriber, inp, transcribe)
73
 
74
  if __name__ == "__main__":
75
  transcriberUI.launch()
 
1
+ import spaces
2
+ import torch
3
  import gradio as gr
4
+ from transformers import pipeline
5
+ from transformers.pipelines.audio_utils import ffmpeg_read
6
 
7
+ MODEL_NAME = "openai/whisper-large-v3"
8
+ BATCH_SIZE = 8
9
+ FILE_LIMIT_MB = 1000
 
 
 
 
10
 
11
+ device = 0 if torch.cuda.is_available() else "cpu"
 
 
12
 
13
+ pipe = pipeline(
14
+ task="automatic-speech-recognition",
15
+ model=MODEL_NAME,
16
+ chunk_length_s=30,
17
+ device=device,
18
+ )
19
 
20
+ @spaces.GPU
21
+ def audio_transcribe(inputs, task):
22
+ if inputs is None:
23
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
 
24
 
25
+ text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
26
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  with gr.Blocks() as transcriberUI:
29
  gr.Markdown(
30
  """
31
+ # Ola!
32
  Clicar no botao abaixo para selecionar o Audio a ser transcrito!
33
+ Ambiente Demo disponivel 24x7. Running on ZeroGPU with openai/whisper-large-v3
34
  """)
35
+ inp = gr.File(label="Arquivo de Audio", show_label=True, type="file_path", file_count="single", file_types=["mp3"])
36
  transcribe = gr.Textbox(label="Transcricao", show_label=True, show_copy_button=True)
37
+ inp.upload(audio_transcribe, inp, transcribe)
38
 
39
  if __name__ == "__main__":
40
  transcriberUI.launch()