Whisper-zh-HK / app.py
tilos's picture
Update app.py
2b148f7
raw
history blame
3.74 kB
from transformers import pipeline
import gradio as gr
import os
import subprocess
from pytube import YouTube
import subprocess
pipe = pipeline(model="tilos/whisper-small-zh-HK") # change to "your-username/the-name-you-picked"
def video2mp3(video_file, output_ext="mp3"):
filename, ext = os.path.splitext(video_file)
subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"],
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT)
return f"{filename}.{output_ext}"
def transcribe(audio):
text = pipe(audio)["text"]
return text
def gen_sub(video):
result = subprocess.check_output(['auto_subtitle','/example.mp4','--model', 'medium'])
return result
def get_text(url):
result = pipe(get_audio(url))
return result['text'].strip()
def get_audio(url):
website = YouTube(url)
video = website.streams.filter(only_audio=True).first()
out_file = video.download(output_path=".")
base, ext = os.path.splitext(out_file)
new_file = base + '.mp3'
os.rename(out_file, new_file)
audio = new_file
return audio
def offline_video(video):
audio_file = video2mp3(video)
text = transcribe(audio_file)
return text
with gr.Blocks() as demo:
# video file input
gr.Interface(
title="Whisper: Real Time Cantonese Recognition",
description="Realtime demo for Cantonese speech recognition using a fine-tuned Whisper small model. "
"Generate zh-HK subtitle from video file, audio file, your microphone, and Youtube URL",
fn=offline_video,
inputs="video",
outputs="text",
allow_flagging="never",
)
# audio file input, generate subtitled video
gr.Interface(
description= "Generate Cantonese subtitled video from video file",
fn=gen_sub,
inputs="video",
outputs="video",
allow_flagging="never",
)
# audio file input
with gr.Row():
with gr.Column():
input_audio = gr.Audio(source="upload", type="filepath")
micro_btn = gr.Button('Generate Voice Subtitles')
with gr.Column():
output_audio = gr.Textbox(placeholder='Transcript from audio', label='Subtitles')
micro_btn.click(transcribe, inputs=input_audio, outputs=output_audio)
"""
gr.Interface(
fn=transcribe,
title="Whisper: zh-HK Subtitle Generator",
description="Generate zh-HK subtitle from audio file, your microphone and Youtube",
inputs = gr.Audio(source="upload", type="filepath", optional=True),
outputs = "text",
allow_flagging= "never",
)
"""
# microphone input
with gr.Row():
with gr.Column():
input_mircro = gr.Audio(source="microphone", type="filepath")
micro_btn = gr.Button('Generate Voice Subtitles')
with gr.Column():
output_micro = gr.Textbox(placeholder='Transcript from mic', label='Subtitles')
micro_btn.click(transcribe, inputs=input_mircro, outputs=output_micro)
# Youtube url input
with gr.Row():
with gr.Column():
inputs_url = gr.Textbox(placeholder='Youtube URL', label='URL')
url_btn = gr.Button('Generate Youtube Video Subtitles')
examples = gr.Examples(examples=["https://www.youtube.com/watch?v=Yw4EoGWe0vw"],inputs=[inputs_url])
with gr.Column():
output_url = gr.Textbox(placeholder='Transcript from video.', label='Transcript')
url_btn.click(get_text, inputs=inputs_url, outputs=output_url )
demo.launch(debug=True)