Spaces:

Lenylvt
/

VideoSubtitleCreation-API

Running on Zero

File size: 5,987 Bytes

9e156fa
4f0841f
 
effdcb4
 
9e156fa
effdcb4
dbc58d4
9e156fa
effdcb4
4f0841f
 
 
 
9e156fa
8c56203
4f0841f
effdcb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f0841f
 
effdcb4
 
 
 
 
4f0841f
effdcb4
 
 
 
 
 
 
 
 
 
 
 
 
4f0841f
effdcb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f0841f
9e156fa
effdcb4
 
 
 
 
 
 
 
 
 
 
 
9e156fa
effdcb4
 
 
 
 
 
 
c18dcee
effdcb4

import gradio as gr
from faster_whisper import WhisperModel
import logging
import os
import pysrt
import pandas as pd
from transformers import MarianMTModel, MarianTokenizer
import ffmpeg

# Configuration initiale et chargement des données
url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
df = pd.read_csv(url, delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
df['ISO 639-1'] = df['ISO 639-1'].str.strip()

language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']}") for index, row in df.iterrows()]

logging.basicConfig(level=logging.DEBUG)

# Fonction pour formater un texte en SRT
def text_to_srt(text):
    lines = text.split('\n')
    srt_content = ""
    for i, line in enumerate(lines):
        if line.strip() == "":
            continue
        try:
            times, content = line.split(']', 1)
            start, end = times[1:].split(' -> ')
            if start.count(":") == 1:
                start = "00:" + start
            if end.count(":") == 1:
                end = "00:" + end
            srt_content += f"{i+1}\n{start.replace('.', ',')} --> {end.replace('.', ',')}\n{content.strip()}\n\n"
        except ValueError:
            continue
    temp_file_path = '/tmp/output.srt'
    with open(temp_file_path, 'w', encoding='utf-8') as file:
        file.write(srt_content)
    return temp_file_path

# Fonction pour formater des secondes en timestamp
def format_timestamp(seconds):
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    seconds_remainder = seconds % 60
    return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"

# Fonction de traduction de texte
def translate_text(text, source_language_code, target_language_code):
    model_name = f"Helsinki-NLP/opus-mt-{source_language_code}-{target_language_code}"
    if source_language_code == target_language_code:
        return "Translation between the same languages is not supported."
    try:
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)
    except Exception as e:
        return f"Failed to load model for {source_language_code} to {target_language_code}: {str(e)}"
    translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512))
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

# Fonction pour traduire un fichier SRT
def translate_srt(input_file_path, source_language_code, target_language_code, progress=None):
    subs = pysrt.open(input_file_path)
    translated_subs = []
    for idx, sub in enumerate(subs):
        translated_text = translate_text(sub.text, source_language_code, target_language_code)
        translated_sub = pysrt.SubRipItem(index=idx+1, start=sub.start, end=sub.end, text=translated_text)
        translated_subs.append(translated_sub)
        if progress:
            progress((idx + 1) / len(subs))
    translated_srt_path = input_file_path.replace(".srt", f"_{target_language_code}.srt")
    pysrt.SubRipFile(translated_subs).save(translated_srt_path)
    return translated_srt_path

# Fonction pour transcrire l'audio d'une vidéo en texte
def transcribe(audio_file_path, model_size="base"):
    device = "cpu"
    compute_type = "int8"
    model = WhisperModel(model_size=model_size, device=device, compute_type=compute_type)
    segments, _ = model.transcribe(audio_file_path)
    transcription_with_timestamps = [
        f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
        for segment in segments
    ]
    return "\n".join(transcription_with_timestamps)

# Fonction pour ajouter des sous-titres à une vidéo
def add_subtitle_to_video(input_video, subtitle_file, subtitle_language, soft_subtitle):
    video_input_stream = ffmpeg.input(input_video)
    subtitle_input_stream = ffmpeg.input(subtitle_file)
    input_video_name = os.path.splitext(os.path.basename(input_video))[0]
    output_video = f"/tmp/output-{input_video_name}.mp4"
    subtitle_track_title = os.path.splitext(os.path.basename(subtitle_file))[0]

    if soft_subtitle:
        stream = ffmpeg.output(
            video_input_stream, subtitle_input_stream, output_video,
            **{"c": "copy", "c:s": "mov_text"},
            **{"metadata:s:s:0": f"language={subtitle_language}", "metadata:s:s:0": f"title={subtitle_track_title}"}
        )
    else:
        stream = ffmpeg.output(
            video_input_stream, output_video,
            vf=f"subtitles={subtitle_file}"
        )

    ffmpeg.run(stream, overwrite_output=True)
    return output_video

# Définition des interfaces Gradio
def transcribe_and_translate_video(video_file, source_language_code, target_language_code):
    transcription = transcribe(video_file.name, "tiny")
    srt_path = text_to_srt(transcription)
    translated_srt_path = translate_srt(srt_path, source_language_code, target_language_code)
    output_video = add_subtitle_to_video(video_file.name, translated_srt_path, target_language_code, False)
    return output_video, translated_srt_path

# Inputs et outputs Gradio
video_input = gr.inputs.Video(label="Video File")
source_language_dropdown = gr.inputs.Dropdown(choices=language_options, label="Source Language")
target_language_dropdown = gr.inputs.Dropdown(choices=language_options, label="Target Language")

transcribe_translate_interface = gr.Interface(
    fn=transcribe_and_translate_video,
    inputs=[video_input, source_language_dropdown, target_language_dropdown],
    outputs=[gr.outputs.Video(label="Video with Translated Subtitles"), gr.outputs.File(label="Translated Subtitles (.srt)")],
    title="Video Transcribe & Translate",
    description="Transcribe and translate the subtitles of your video into another language."
)

# Lancement de l'application Gradio
transcribe_translate_interface.launch()