Spaces:
Running
on
Zero
Running
on
Zero
File size: 5,987 Bytes
9e156fa 4f0841f effdcb4 9e156fa effdcb4 dbc58d4 9e156fa effdcb4 4f0841f 9e156fa 8c56203 4f0841f effdcb4 4f0841f effdcb4 4f0841f effdcb4 4f0841f effdcb4 4f0841f 9e156fa effdcb4 9e156fa effdcb4 c18dcee effdcb4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import gradio as gr
from faster_whisper import WhisperModel
import logging
import os
import pysrt
import pandas as pd
from transformers import MarianMTModel, MarianTokenizer
import ffmpeg
# Configuration initiale et chargement des données
url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
df = pd.read_csv(url, delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
df['ISO 639-1'] = df['ISO 639-1'].str.strip()
language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']}") for index, row in df.iterrows()]
logging.basicConfig(level=logging.DEBUG)
# Fonction pour formater un texte en SRT
def text_to_srt(text):
lines = text.split('\n')
srt_content = ""
for i, line in enumerate(lines):
if line.strip() == "":
continue
try:
times, content = line.split(']', 1)
start, end = times[1:].split(' -> ')
if start.count(":") == 1:
start = "00:" + start
if end.count(":") == 1:
end = "00:" + end
srt_content += f"{i+1}\n{start.replace('.', ',')} --> {end.replace('.', ',')}\n{content.strip()}\n\n"
except ValueError:
continue
temp_file_path = '/tmp/output.srt'
with open(temp_file_path, 'w', encoding='utf-8') as file:
file.write(srt_content)
return temp_file_path
# Fonction pour formater des secondes en timestamp
def format_timestamp(seconds):
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds_remainder = seconds % 60
return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
# Fonction de traduction de texte
def translate_text(text, source_language_code, target_language_code):
model_name = f"Helsinki-NLP/opus-mt-{source_language_code}-{target_language_code}"
if source_language_code == target_language_code:
return "Translation between the same languages is not supported."
try:
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
except Exception as e:
return f"Failed to load model for {source_language_code} to {target_language_code}: {str(e)}"
translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512))
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
return translated_text
# Fonction pour traduire un fichier SRT
def translate_srt(input_file_path, source_language_code, target_language_code, progress=None):
subs = pysrt.open(input_file_path)
translated_subs = []
for idx, sub in enumerate(subs):
translated_text = translate_text(sub.text, source_language_code, target_language_code)
translated_sub = pysrt.SubRipItem(index=idx+1, start=sub.start, end=sub.end, text=translated_text)
translated_subs.append(translated_sub)
if progress:
progress((idx + 1) / len(subs))
translated_srt_path = input_file_path.replace(".srt", f"_{target_language_code}.srt")
pysrt.SubRipFile(translated_subs).save(translated_srt_path)
return translated_srt_path
# Fonction pour transcrire l'audio d'une vidéo en texte
def transcribe(audio_file_path, model_size="base"):
device = "cpu"
compute_type = "int8"
model = WhisperModel(model_size=model_size, device=device, compute_type=compute_type)
segments, _ = model.transcribe(audio_file_path)
transcription_with_timestamps = [
f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
for segment in segments
]
return "\n".join(transcription_with_timestamps)
# Fonction pour ajouter des sous-titres à une vidéo
def add_subtitle_to_video(input_video, subtitle_file, subtitle_language, soft_subtitle):
video_input_stream = ffmpeg.input(input_video)
subtitle_input_stream = ffmpeg.input(subtitle_file)
input_video_name = os.path.splitext(os.path.basename(input_video))[0]
output_video = f"/tmp/output-{input_video_name}.mp4"
subtitle_track_title = os.path.splitext(os.path.basename(subtitle_file))[0]
if soft_subtitle:
stream = ffmpeg.output(
video_input_stream, subtitle_input_stream, output_video,
**{"c": "copy", "c:s": "mov_text"},
**{"metadata:s:s:0": f"language={subtitle_language}", "metadata:s:s:0": f"title={subtitle_track_title}"}
)
else:
stream = ffmpeg.output(
video_input_stream, output_video,
vf=f"subtitles={subtitle_file}"
)
ffmpeg.run(stream, overwrite_output=True)
return output_video
# Définition des interfaces Gradio
def transcribe_and_translate_video(video_file, source_language_code, target_language_code):
transcription = transcribe(video_file.name, "tiny")
srt_path = text_to_srt(transcription)
translated_srt_path = translate_srt(srt_path, source_language_code, target_language_code)
output_video = add_subtitle_to_video(video_file.name, translated_srt_path, target_language_code, False)
return output_video, translated_srt_path
# Inputs et outputs Gradio
video_input = gr.inputs.Video(label="Video File")
source_language_dropdown = gr.inputs.Dropdown(choices=language_options, label="Source Language")
target_language_dropdown = gr.inputs.Dropdown(choices=language_options, label="Target Language")
transcribe_translate_interface = gr.Interface(
fn=transcribe_and_translate_video,
inputs=[video_input, source_language_dropdown, target_language_dropdown],
outputs=[gr.outputs.Video(label="Video with Translated Subtitles"), gr.outputs.File(label="Translated Subtitles (.srt)")],
title="Video Transcribe & Translate",
description="Transcribe and translate the subtitles of your video into another language."
)
# Lancement de l'application Gradio
transcribe_translate_interface.launch() |