Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,859 Bytes
9e156fa 4f0841f 9e156fa dbc58d4 9e156fa 4f0841f 9e156fa 4f0841f 8c56203 4f0841f c18dcee 4f0841f c18dcee 4f0841f 1aee38e 4f0841f c18dcee 4f0841f c18dcee 4f0841f c18dcee 4f0841f c18dcee 4f0841f 9e156fa 4f0841f c98ea09 4f0841f c18dcee 4f0841f c18dcee 4f0841f 9e156fa 4f0841f c18dcee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import gradio as gr
from faster_whisper import WhisperModel
import logging
from transformers import MarianMTModel, MarianTokenizer
import pandas as pd
import requests
import ffmpeg
# Configure logging for debugging purposes
logging.basicConfig()
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
# Fetch and parse language options from the provided URL
url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
df = pd.read_csv(url, delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
df['ISO 639-1'] = df['ISO 639-1'].str.strip()
# Prepare language options for the dropdown
language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']}") for index, row in df.iterrows()]
def transcribe_and_optionally_translate(audio_file, source_language, target_language, model_size, change_transcript):
# Transcription
device = "cpu" # Use "cuda" for GPU
compute_type = "int8" # Use "float16" or "int8" for GPU, "int8" for CPU
model = WhisperModel(model_size, device=device, compute_type=compute_type)
segments, _ = model.transcribe(audio_file)
transcription = " ".join([segment.text for segment in segments])
if change_transcript:
# Assume user will modify the transcript manually before translation
return transcription, True
# Translation
if source_language != target_language:
model_name = f"Helsinki-NLP/opus-mt-{source_language}-{target_language}"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
translated = model.generate(**tokenizer(transcription, return_tensors="pt", padding=True, truncation=True, max_length=512))
transcription = tokenizer.decode(translated[0], skip_special_tokens=True)
return transcription, False
def add_hard_subtitle_to_video(input_video, transcript):
"""Add hard subtitles to video."""
temp_subtitle_path = '/tmp/subtitle.srt'
with open(temp_subtitle_path, 'w', encoding='utf-8') as file:
file.write(transcript) # Assuming transcript is in SRT format
output_video_path = f"/tmp/output_video.mp4"
ffmpeg.input(input_video).output(output_video_path, vf=f"subtitles={temp_subtitle_path}").run(quiet=True)
return output_video_path
def process_video(video, source_language, target_language, model_size='base', change_transcript=False, modified_transcript=None):
audio_file = video # Directly use the video file as the audio input
transcript, can_modify = transcribe_and_optionally_translate(audio_file, source_language, target_language, model_size, change_transcript)
if can_modify and modified_transcript:
# Use the modified transcript for translation if allowed and provided
transcript = modified_transcript
# Perform translation here if necessary (similar to the previous step)
output_video = add_hard_subtitle_to_video(video, transcript)
return output_video
# Setup the Gradio app
app = gr.Interface(
fn=process_video,
inputs=[
gr.Video(label="Upload Video"),
gr.Dropdown(choices=language_options, label="Source Language"),
gr.Dropdown(choices=language_options, label="Target Language"),
gr.Dropdown(choices=["base", "small", "medium", "large", "large-v2", "large-v3"], label="Model Size"),
gr.Checkbox(label="Change Transcript before Translation?", value=False),
gr.TextArea(label="Modified Transcript (if allowed)")
],
outputs=gr.Text(label="Transcript"),
title="Video Transcription and Translation Tool",
description="Transcribe or translate your video content. Optionally, edit the transcription before adding hard subtitles."
)
if __name__ == "__main__":
app.launch()
|