Spaces:
Runtime error
Runtime error
File size: 3,304 Bytes
22b4650 d1baf01 22b4650 d66d670 22b4650 67ee310 d1baf01 22b4650 d1baf01 67ee310 22b4650 8a53bbd 22b4650 001993c 22b4650 001993c 22b4650 9e5c5ec e08c550 9e5c5ec 8a53bbd 9e5c5ec 7355ad5 001993c 0d8f3bf 001993c 0d8f3bf 001993c 7355ad5 001993c 8a53bbd 22b4650 001993c 22b4650 001993c 22b4650 8a53bbd 22b4650 001993c 22b4650 0d7fd71 001993c 22b4650 0d8f3bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
from transformers import pipeline
import gradio as gr
import os
import subprocess
from pytube import YouTube
pipe = pipeline(model="tilos/whisper-small-zh-HK") # change to "your-username/the-name-you-picked"
def video2mp3(video_file, output_ext="mp3"):
filename, ext = os.path.splitext(video_file)
subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"],
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT)
return f"{filename}.{output_ext}"
def transcribe(audio):
text = pipe(audio)["text"]
return text
def get_text(url):
result = pipe(get_audio(url))
return result['text'].strip()
def get_audio(url):
website = YouTube(url)
video = website.streams.filter(only_audio=True).first()
out_file = video.download(output_path=".")
base, ext = os.path.splitext(out_file)
new_file = base + '.mp3'
os.rename(out_file, new_file)
audio = new_file
return audio
def offline_video(video):
audio_file = video2mp3(video)
text = transcribe(audio_file)
return text
with gr.Blocks() as demo:
# video file input
gr.Interface(
title="Whisper: Real Time Cantonese Recognition",
description="Realtime demo for Cantonese speech recognition using a fine-tuned Whisper small model. "
"Generate zh-HK subtitle from video file, audio file, your microphone, and Youtube URL",
fn=offline_video,
inputs="video",
outputs="text",
allow_flagging="never",
)
# audio file input
with gr.Row():
with gr.Column():
input_audio = gr.Audio(source="upload", type="filepath")
micro_btn = gr.Button('Generate Voice Subtitles')
with gr.Column():
output_audio = gr.Textbox(placeholder='Transcript from audio', label='Subtitles')
micro_btn.click(transcribe, inputs=input_audio, outputs=output_audio)
"""
gr.Interface(
fn=transcribe,
title="Whisper: zh-HK Subtitle Generator",
description="Generate zh-HK subtitle from audio file, your microphone and Youtube",
inputs = gr.Audio(source="upload", type="filepath", optional=True),
outputs = "text",
allow_flagging= "never",
)
"""
# microphone input
with gr.Row():
with gr.Column():
input_mircro = gr.Audio(source="microphone", type="filepath")
micro_btn = gr.Button('Generate Voice Subtitles')
with gr.Column():
output_micro = gr.Textbox(placeholder='Transcript from mic', label='Subtitles')
micro_btn.click(transcribe, inputs=input_mircro, outputs=output_micro)
# Youtube url input
with gr.Row():
with gr.Column():
inputs_url = gr.Textbox(placeholder='Youtube URL', label='URL')
url_btn = gr.Button('Generate Youtube Video Subtitles')
examples = gr.Examples(examples=["https://www.youtube.com/watch?v=Yw4EoGWe0vw"],inputs=[inputs_url])
with gr.Column():
output_url = gr.Textbox(placeholder='Transcript from video.', label='Transcript')
url_btn.click(get_text, inputs=inputs_url, outputs=output_url )
demo.launch(debug=True) |