axxam commited on
Commit
286eb75
·
verified ·
1 Parent(s): 7decf89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -38
app.py CHANGED
@@ -1,11 +1,10 @@
1
  import torch
2
-
3
  import gradio as gr
4
  import pytube as pt
5
  from transformers import pipeline
6
  from huggingface_hub import model_info
7
 
8
- MODEL_NAME = "BlueRaccoon/whisper-small-kab" #this always needs to stay in line 8 :D sorry for the hackiness
9
  lang = "uz"
10
 
11
  device = 0 if torch.cuda.is_available() else "cpu"
@@ -18,6 +17,7 @@ pipe = pipeline(
18
 
19
  pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
20
 
 
21
  def transcribe(microphone, file_upload):
22
  warn_output = ""
23
  if (microphone is not None) and (file_upload is not None):
@@ -56,42 +56,42 @@ def yt_transcribe(yt_url):
56
  return html_embed_str, text
57
 
58
 
59
- demo = gr.Blocks()
60
-
61
- mf_transcribe = gr.Interface(
62
- fn=transcribe,
63
- inputs=[
64
- gr.inputs.Audio(source="microphone", type="filepath", optional=True),
65
- gr.inputs.Audio(source="upload", type="filepath", optional=True),
66
- ],
67
- outputs="text",
68
- layout="horizontal",
69
- theme="huggingface",
70
- title="Whisper Demo: Transcribe Audio",
71
- description=(
72
- "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the the fine-tuned"
73
- f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
74
- " of arbitrary length."
75
- ),
76
- allow_flagging="never",
77
- )
78
-
79
- yt_transcribe = gr.Interface(
80
- fn=yt_transcribe,
81
- inputs=[gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")],
82
- outputs=["html", "text"],
83
- layout="horizontal",
84
- theme="huggingface",
85
- title="Whisper Demo: Transcribe YouTube",
86
- description=(
87
- "Transcribe long-form YouTube videos with the click of a button! Demo uses the the fine-tuned checkpoint:"
88
- f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files of"
89
- " arbitrary length."
90
- ),
91
- allow_flagging="never",
92
- )
93
 
94
- with demo:
95
- gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  demo.launch(enable_queue=True)
 
1
  import torch
 
2
  import gradio as gr
3
  import pytube as pt
4
  from transformers import pipeline
5
  from huggingface_hub import model_info
6
 
7
+ MODEL_NAME = "BlueRaccoon/whisper-small-kab" # this always needs to stay in line 8 :D sorry for the hackiness
8
  lang = "uz"
9
 
10
  device = 0 if torch.cuda.is_available() else "cpu"
 
17
 
18
  pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
19
 
20
+
21
  def transcribe(microphone, file_upload):
22
  warn_output = ""
23
  if (microphone is not None) and (file_upload is not None):
 
56
  return html_embed_str, text
57
 
58
 
59
+ with gr.Blocks() as demo:
60
+ with gr.Tab("Transcribe Audio"):
61
+ gr.Markdown(
62
+ f"""
63
+ # Whisper Demo: Transcribe Audio
64
+ Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the fine-tuned
65
+ checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files
66
+ of arbitrary length.
67
+ """
68
+ )
69
+ audio_inputs = [
70
+ gr.Audio(source="microphone", type="filepath", label="Record from Microphone"),
71
+ gr.Audio(source="upload", type="filepath", label="Upload Audio File"),
72
+ ]
73
+ gr.Interface(
74
+ fn=transcribe,
75
+ inputs=audio_inputs,
76
+ outputs=gr.Textbox(label="Transcription"),
77
+ live=False,
78
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ with gr.Tab("Transcribe YouTube"):
81
+ gr.Markdown(
82
+ f"""
83
+ # Whisper Demo: Transcribe YouTube
84
+ Transcribe long-form YouTube videos with the click of a button! Demo uses the fine-tuned checkpoint
85
+ [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files of
86
+ arbitrary length.
87
+ """
88
+ )
89
+ yt_url_input = gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")
90
+ gr.Interface(
91
+ fn=yt_transcribe,
92
+ inputs=[yt_url_input],
93
+ outputs=[gr.HTML(label="YouTube Video"), gr.Textbox(label="Transcription")],
94
+ live=False,
95
+ )
96
 
97
  demo.launch(enable_queue=True)