File size: 5,987 Bytes
9e156fa
4f0841f
 
effdcb4
 
9e156fa
effdcb4
dbc58d4
9e156fa
effdcb4
4f0841f
 
 
 
9e156fa
8c56203
4f0841f
effdcb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f0841f
 
effdcb4
 
 
 
 
4f0841f
effdcb4
 
 
 
 
 
 
 
 
 
 
 
 
4f0841f
effdcb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f0841f
9e156fa
effdcb4
 
 
 
 
 
 
 
 
 
 
 
9e156fa
effdcb4
 
 
 
 
 
 
c18dcee
effdcb4
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import gradio as gr
from faster_whisper import WhisperModel
import logging
import os
import pysrt
import pandas as pd
from transformers import MarianMTModel, MarianTokenizer
import ffmpeg

# Configuration initiale et chargement des données
url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
df = pd.read_csv(url, delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
df['ISO 639-1'] = df['ISO 639-1'].str.strip()

language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']}") for index, row in df.iterrows()]

logging.basicConfig(level=logging.DEBUG)

# Fonction pour formater un texte en SRT
def text_to_srt(text):
    lines = text.split('\n')
    srt_content = ""
    for i, line in enumerate(lines):
        if line.strip() == "":
            continue
        try:
            times, content = line.split(']', 1)
            start, end = times[1:].split(' -> ')
            if start.count(":") == 1:
                start = "00:" + start
            if end.count(":") == 1:
                end = "00:" + end
            srt_content += f"{i+1}\n{start.replace('.', ',')} --> {end.replace('.', ',')}\n{content.strip()}\n\n"
        except ValueError:
            continue
    temp_file_path = '/tmp/output.srt'
    with open(temp_file_path, 'w', encoding='utf-8') as file:
        file.write(srt_content)
    return temp_file_path

# Fonction pour formater des secondes en timestamp
def format_timestamp(seconds):
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    seconds_remainder = seconds % 60
    return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"

# Fonction de traduction de texte
def translate_text(text, source_language_code, target_language_code):
    model_name = f"Helsinki-NLP/opus-mt-{source_language_code}-{target_language_code}"
    if source_language_code == target_language_code:
        return "Translation between the same languages is not supported."
    try:
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)
    except Exception as e:
        return f"Failed to load model for {source_language_code} to {target_language_code}: {str(e)}"
    translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512))
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text

# Fonction pour traduire un fichier SRT
def translate_srt(input_file_path, source_language_code, target_language_code, progress=None):
    subs = pysrt.open(input_file_path)
    translated_subs = []
    for idx, sub in enumerate(subs):
        translated_text = translate_text(sub.text, source_language_code, target_language_code)
        translated_sub = pysrt.SubRipItem(index=idx+1, start=sub.start, end=sub.end, text=translated_text)
        translated_subs.append(translated_sub)
        if progress:
            progress((idx + 1) / len(subs))
    translated_srt_path = input_file_path.replace(".srt", f"_{target_language_code}.srt")
    pysrt.SubRipFile(translated_subs).save(translated_srt_path)
    return translated_srt_path

# Fonction pour transcrire l'audio d'une vidéo en texte
def transcribe(audio_file_path, model_size="base"):
    device = "cpu"
    compute_type = "int8"
    model = WhisperModel(model_size=model_size, device=device, compute_type=compute_type)
    segments, _ = model.transcribe(audio_file_path)
    transcription_with_timestamps = [
        f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
        for segment in segments
    ]
    return "\n".join(transcription_with_timestamps)

# Fonction pour ajouter des sous-titres à une vidéo
def add_subtitle_to_video(input_video, subtitle_file, subtitle_language, soft_subtitle):
    video_input_stream = ffmpeg.input(input_video)
    subtitle_input_stream = ffmpeg.input(subtitle_file)
    input_video_name = os.path.splitext(os.path.basename(input_video))[0]
    output_video = f"/tmp/output-{input_video_name}.mp4"
    subtitle_track_title = os.path.splitext(os.path.basename(subtitle_file))[0]

    if soft_subtitle:
        stream = ffmpeg.output(
            video_input_stream, subtitle_input_stream, output_video,
            **{"c": "copy", "c:s": "mov_text"},
            **{"metadata:s:s:0": f"language={subtitle_language}", "metadata:s:s:0": f"title={subtitle_track_title}"}
        )
    else:
        stream = ffmpeg.output(
            video_input_stream, output_video,
            vf=f"subtitles={subtitle_file}"
        )

    ffmpeg.run(stream, overwrite_output=True)
    return output_video

# Définition des interfaces Gradio
def transcribe_and_translate_video(video_file, source_language_code, target_language_code):
    transcription = transcribe(video_file.name, "tiny")
    srt_path = text_to_srt(transcription)
    translated_srt_path = translate_srt(srt_path, source_language_code, target_language_code)
    output_video = add_subtitle_to_video(video_file.name, translated_srt_path, target_language_code, False)
    return output_video, translated_srt_path

# Inputs et outputs Gradio
video_input = gr.inputs.Video(label="Video File")
source_language_dropdown = gr.inputs.Dropdown(choices=language_options, label="Source Language")
target_language_dropdown = gr.inputs.Dropdown(choices=language_options, label="Target Language")

transcribe_translate_interface = gr.Interface(
    fn=transcribe_and_translate_video,
    inputs=[video_input, source_language_dropdown, target_language_dropdown],
    outputs=[gr.outputs.Video(label="Video with Translated Subtitles"), gr.outputs.File(label="Translated Subtitles (.srt)")],
    title="Video Transcribe & Translate",
    description="Transcribe and translate the subtitles of your video into another language."
)

# Lancement de l'application Gradio
transcribe_translate_interface.launch()