sanjeevbora commited on
Commit
aed644d
·
verified ·
1 Parent(s): b1cbef8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -39
app.py CHANGED
@@ -21,11 +21,11 @@ pipe = pipeline(
21
  device=device,
22
  )
23
 
24
- def transcribe(audio, task):
25
- if audio is None:
26
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
27
 
28
- text = pipe(audio, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
29
  return text
30
 
31
  def _return_yt_html_embed(yt_url):
@@ -83,42 +83,53 @@ def yt_transcribe(yt_url, task, max_filesize=75.0):
83
 
84
  return html_embed_str, text
85
 
86
- with gr.Blocks(theme="huggingface") as demo:
87
- gr.Markdown("# Whisper Large V3: Transcribe Audio")
88
- gr.Markdown(
89
- "Transcribe long-form audio inputs with the click of a button! Demo uses the OpenAI Whisper"
90
- f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
91
- " of arbitrary length."
92
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
- with gr.Tabs():
95
- with gr.TabItem("Microphone"):
96
- with gr.Row():
97
- mic_input = gr.Audio(type="filepath", label="Microphone Input")
98
- # mic_input = gr.Audio(source="microphone", type="filepath", label="Microphone Input")
99
- mic_task = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
100
- mic_output = gr.Textbox(label="Transcription")
101
- mic_button = gr.Button("Transcribe")
102
-
103
- with gr.TabItem("Audio file"):
104
- with gr.Row():
105
- file_input = gr.Audio(type="filepath", label="Audio file")
106
- # file_input = gr.Audio(source="upload", type="filepath", label="Audio file")
107
- file_task = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
108
- file_output = gr.Textbox(label="Transcription")
109
- file_button = gr.Button("Transcribe")
110
-
111
- with gr.TabItem("YouTube"):
112
- with gr.Row():
113
- yt_input = gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")
114
- yt_task = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
115
- yt_embed = gr.HTML(label="Video")
116
- yt_output = gr.Textbox(label="Transcription")
117
- yt_button = gr.Button("Transcribe")
118
-
119
- mic_button.click(transcribe, inputs=[mic_input, mic_task], outputs=mic_output)
120
- file_button.click(transcribe, inputs=[file_input, file_task], outputs=file_output)
121
- yt_button.click(yt_transcribe, inputs=[yt_input, yt_task], outputs=[yt_embed, yt_output])
122
 
123
  if __name__ == "__main__":
124
- demo.launch(enable_queue=True)
 
21
  device=device,
22
  )
23
 
24
+ def transcribe(inputs, task):
25
+ if inputs is None:
26
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
27
 
28
+ text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
29
  return text
30
 
31
  def _return_yt_html_embed(yt_url):
 
83
 
84
  return html_embed_str, text
85
 
86
+ description = (
87
+ "Transcribe long-form audio inputs with the click of a button! Demo uses the OpenAI Whisper"
88
+ f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
89
+ " of arbitrary length."
90
+ )
91
+
92
+ mf_transcribe = gr.Interface(
93
+ fn=transcribe,
94
+ inputs=[
95
+ gr.Audio(type="filepath"),
96
+ gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
97
+ ],
98
+ outputs="text",
99
+ title="Whisper Large V3: Transcribe Audio (Microphone)",
100
+ description=description,
101
+ allow_flagging="never",
102
+ )
103
+
104
+ file_transcribe = gr.Interface(
105
+ fn=transcribe,
106
+ inputs=[
107
+ gr.Audio(type="filepath", label="Audio file"),
108
+ gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
109
+ ],
110
+ outputs="text",
111
+ title="Whisper Large V3: Transcribe Audio (File Upload)",
112
+ description=description,
113
+ allow_flagging="never",
114
+ )
115
+
116
+ yt_transcribe = gr.Interface(
117
+ fn=yt_transcribe,
118
+ inputs=[
119
+ gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
120
+ gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
121
+ ],
122
+ outputs=["html", "text"],
123
+ title="Whisper Large V3: Transcribe YouTube",
124
+ description=(
125
+ "Transcribe long-form YouTube videos with the click of a button! Demo uses the OpenAI Whisper checkpoint"
126
+ f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe video files of"
127
+ " arbitrary length."
128
+ ),
129
+ allow_flagging="never",
130
+ )
131
 
132
+ demo = gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
  if __name__ == "__main__":
135
+ demo.launch()