Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from faster_whisper import WhisperModel | |
import logging | |
import os | |
from moviepy.editor import VideoFileClip | |
import ffmpeg # Make sure to install ffmpeg-python | |
from transformers import MarianMTModel, MarianTokenizer | |
import pandas as pd | |
import pysrt | |
import requests | |
# Configure logging for debugging purposes | |
logging.basicConfig() | |
logging.getLogger("faster_whisper").setLevel(logging.DEBUG) | |
# Fetch and parse language options from the provided URL | |
url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md" | |
df = pd.read_csv(url, delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all') | |
df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name'] | |
df['ISO 639-1'] = df['ISO 639-1'].str.strip() | |
# Prepare language options for the dropdown | |
language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']}") for index, row in df.iterrows()] | |
def format_timestamp(seconds): | |
"""Convert seconds to HH:MM:SS.mmm format.""" | |
hours = int(seconds // 3600) | |
minutes = int((seconds % 3600) // 60) | |
seconds_remainder = seconds % 60 | |
return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}" | |
def extract_audio(video_path): | |
"""Extract audio from video to a temporary audio file.""" | |
output_audio_path = '/tmp/audio.wav' | |
ffmpeg.input(video_path).output(output_audio_path, acodec='pcm_s16le', ac=1, ar='16k').run(quiet=True) | |
return output_audio_path | |
def transcribe_and_optionally_translate(video_file, source_language, target_language, model_size, allow_modification): | |
audio_file = extract_audio(video_file) | |
# Transcription | |
device = "cpu" # GPU : cuda CPU : cpu | |
compute_type = "int8" # GPU : float16 or int8 - CPU : int8 | |
model = WhisperModel(model_size, device=device, compute_type=compute_type) | |
segments, _ = model.transcribe(audio_file) | |
transcription = " ".join([segment.text for segment in segments]) | |
# Translation | |
if source_language != target_language: | |
model_name = f"Helsinki-NLP/opus-mt-{source_language}-{target_language}" | |
tokenizer = MarianTokenizer.from_pretrained(model_name) | |
model = MarianMTModel.from_pretrained(model_name) | |
translated = model.generate(**tokenizer(transcription, return_tensors="pt", padding=True, truncation=True, max_length=512)) | |
transcription = tokenizer.decode(translated[0], skip_special_tokens=True) | |
return transcription, allow_modification | |
def add_hard_subtitle_to_video(input_video, transcript): | |
"""Add hard subtitles to video.""" | |
temp_subtitle_path = '/tmp/subtitle.srt' | |
with open(temp_subtitle_path, 'w', encoding='utf-8') as file: | |
file.write(transcript) # Assuming transcript is in SRT format | |
output_video_path = f"/tmp/output_video.mp4" | |
ffmpeg.input(input_video).output(output_video_path, vf=f"subtitles={temp_subtitle_path}").run(quiet=True) | |
return output_video_path | |
# Gradio Interface | |
def process_video(video, source_language, target_language, model_size='base', allow_modification=False, modified_transcript=None): | |
transcript, can_modify = transcribe_and_optionally_translate(video, source_language, target_language, model_size, allow_modification) | |
if can_modify and modified_transcript: | |
transcript = modified_transcript # Use the modified transcript if provided | |
# Add hard subtitles to the video | |
output_video = add_hard_subtitle_to_video(video, transcript) | |
return output_video | |
# Setup the Gradio app | |
app = gr.Interface( | |
fn=process_video, | |
inputs=[ | |
gr.Video(label="Upload Video"), | |
gr.Dropdown(choices=language_options, label="Source Language"), | |
gr.Dropdown(choices=language_options, label="Target Language"), | |
gr.Dropdown(choices=["base", "small", "medium", "large", "large-v2", "large-v3"], label="Model Size"), | |
gr.Checkbox(label="Allow Transcript Modification?", value=False), | |
gr.TextArea(label="Modified Transcript (if allowed)") | |
], | |
outputs=gr.Video(label="Processed Video with Hard Subtitles"), | |
title="Video Transcription and Translation Tool", | |
description="Transcribe or translate your video content. Optionally, edit the transcription before adding hard subtitles." | |
) | |
if __name__ == "__main__": | |
app.launch() | |