Spaces:

Lenylvt
/

VideoSubtitleCreation-API

Running on Zero

File size: 6,608 Bytes

9e156fa
4f0841f
 
effdcb4
 
9e156fa
effdcb4
dbc58d4
9e156fa
effdcb4
4f0841f
 
 
 
9e156fa
8c56203
4f0841f
effdcb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f0841f
 
effdcb4
 
 
 
 
4f0841f
effdcb4
 
 
 
 
 
 
 
 
 
 
 
 
4f0841f
effdcb4
 
 
 
 
 
 
 
 
 
 
 
 
eb5718c
effdcb4
 
 
eb5718c
effdcb4
eb5718c
effdcb4
eb5718c
effdcb4
4f0841f
9e156fa
eb5718c
 
 
 
 
 
 
 
 
 
effdcb4
 
 
eb5718c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
effdcb4
c18dcee
eb5718c

import gradio as gr
from faster_whisper import WhisperModel
import logging
import os
import pysrt
import pandas as pd
from transformers import MarianMTModel, MarianTokenizer
import ffmpeg

# Configuration initiale et chargement des données
url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
df = pd.read_csv(url, delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
df['ISO 639-1'] = df['ISO 639-1'].str.strip()

language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']}") for index, row in df.iterrows()]

logging.basicConfig(level=logging.DEBUG)

# Fonction pour formater un texte en SRT
def text_to_srt(text):
    lines = text.split('\n')
    srt_content = ""
    for i, line in enumerate(lines):
        if line.strip() == "":
            continue
        try:
            times, content = line.split(']', 1)
            start, end = times[1:].split(' -> ')
            if start.count(":") == 1:
                start = "00:" + start
            if end.count(":") == 1:
                end = "00:" + end
            srt_content += f"{i+1}\n{start.replace('.', ',')} --> {end.replace('.', ',')}\n{content.strip()}\n\n"
        except ValueError:
            continue
    temp_file_path = '/tmp/output.srt'
    with open(temp_file_path, 'w', encoding='utf-8') as file:
        file.write(srt_content)
    return temp_file_path

# Fonction pour formater des secondes en timestamp
def format_timestamp(seconds):
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    seconds_remainder = seconds % 60
    return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"

# Fonction de traduction de texte
def translate_text(text, source_language_code, target_language_code):
    model_name = f"Helsinki-NLP/opus-mt-{source_language_code}-{target_language_code}"
    if source_language_code == target_language_code:
        return "Translation between the same languages is not supported."
    try:
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)
    except Exception as e:
        return f"Failed to load model for {source_language_code} to {target_language_code}: {str(e)}"
    translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512))
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

# Fonction pour traduire un fichier SRT
def translate_srt(input_file_path, source_language_code, target_language_code, progress=None):
    subs = pysrt.open(input_file_path)
    translated_subs = []
    for idx, sub in enumerate(subs):
        translated_text = translate_text(sub.text, source_language_code, target_language_code)
        translated_sub = pysrt.SubRipItem(index=idx+1, start=sub.start, end=sub.end, text=translated_text)
        translated_subs.append(translated_sub)
        if progress:
            progress((idx + 1) / len(subs))
    translated_srt_path = input_file_path.replace(".srt", f"_{target_language_code}.srt")
    pysrt.SubRipFile(translated_subs).save(translated_srt_path)
    return translated_srt_path

# Fonction pour transcrire l'audio d'une vidéo en texte
def transcribe(audio_file_path, model_size="base"):
    device = "cpu"
    compute_type = "int8"
    model = WhisperModel(model_size=model_size, device=device, compute_type=compute_type)
    segments, _ = model.transcribe(audio_file_path)
    transcription_with_timestamps = [
        f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
        for segment in segments
    ]
    return "\n".join(transcription_with_timestamps)

# Fonction pour ajouter des sous-titres à une vidéo
def add_subtitle_to_video(input_video, subtitle_file, subtitle_language, soft_subtitle=False):
    video_input_stream = ffmpeg.input(input_video)
    subtitle_input_stream = ffmpeg.input(subtitle_file)
    input_video_name = os.path.splitext(os.path.basename(input_video))[0]
    output_video = f"/tmp/{input_video_name}_subtitled.mp4"
    if soft_subtitle:
        stream = ffmpeg.output(video_input_stream, subtitle_input_stream, output_video, **{"c": "copy", "c:s": "mov_text"})
    else:
        stream = ffmpeg.output(video_input_stream, output_video, vf=f"subtitles={subtitle_file}")
    ffmpeg.run(stream, overwrite_output=True)
    return output_video

# Fonction de transcription pour Gradio avec ajout de sous-titres à la vidéo
def transcribe_video(video_file):
    transcription = transcribe(video_file.name, "tiny")
    srt_path = text_to_srt(transcription)
    # Ajouter les sous-titres à la vidéo
    output_video_path = add_subtitle_to_video(video_file.name, srt_path, subtitle_language="eng", soft_subtitle=False)
    return output_video_path, srt_path

# Fonction de traduction pour Gradio qui utilise maintenant add_subtitle_to_video
def translate_video(video_file, source_language_code, target_language_code):
    transcription = transcribe(video_file.name, "tiny")
    srt_path = text_to_srt(transcription)
    translated_srt_path = translate_srt(srt_path, source_language_code, target_language_code)
    # Ajoutez les sous-titres traduits à la vidéo
    output_video_path = add_subtitle_to_video(video_file.name, translated_srt_path, target_language_code, soft_subtitle=False)
    return output_video_path, translated_srt_path

# Interface Gradio
file_input = gr.Video(label="Video File")
source_language_dropdown = gr.Dropdown(choices=language_options, label="Source Language")
target_language_dropdown = gr.Dropdown(choices=language_options, label="Target Language")

transcription_interface = gr.Interface(
    fn=transcribe_video,
    inputs=file_input,
    outputs=[gr.Video(label="Video with Subtitles"), gr.File(label="Transcription File (.srt)")],
    title="Video Transcription",
    description="Automatically transcribe the audio from your video into subtitles and add them to the video."
)

translation_interface = gr.Interface(
    fn=translate_video,
    inputs=[file_input, source_language_dropdown, target_language_dropdown],
    outputs=[gr.Video(label="Video with Translated Subtitles"), gr.File(label="Translated Subtitles (.srt)")],
    title="Video Translation",
    description="Translate the subtitles of your video into another language and add them to the video."
)

tabs = gr.Tabs([
    gr.Tab(transcription_interface, label="Transcribe"),
    gr.Tab(translation_interface, label="Translate")
])

# Lancement de l'application
tabs.launch()