Spaces:
Runtime error
Runtime error
File size: 3,069 Bytes
e002d92 7cb0b8e e002d92 41ec54b 7cb0b8e e002d92 41ec54b e002d92 41ec54b 9dc25d9 41ec54b e002d92 41ec54b e002d92 41ec54b d4af723 41ec54b 6ddf7cf e002d92 6ddf7cf 362c063 e002d92 9c9b591 6ddf7cf 9c9b591 cb035db 362c063 cb035db 41ec54b 1bf6da9 d4af723 9895fa7 41ec54b e002d92 cb035db 41ec54b e002d92 9895fa7 6ddf7cf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import pysrt
import gradio as gr
import pandas as pd
from transformers import MarianMTModel, MarianTokenizer
# Fetch and parse language options from the provided URL
url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
df = pd.read_csv(url, delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
df['ISO 639-1'] = df['ISO 639-1'].str.strip()
# Prepare language options for the dropdown
language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']}") for index, row in df.iterrows()]
def translate_text(text, source_language_code, target_language_code):
# Construct model name using ISO 639-1 codes
model_name = f"Helsinki-NLP/opus-mt-{source_language_code}-{target_language_code}"
# Check if source and target languages are the same, which is not supported for translation
if source_language_code == target_language_code:
return "Translation between the same languages is not supported."
# Load tokenizer and model
try:
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
except Exception as e:
return f"Failed to load model for {source_language_code} to {target_language_code}: {str(e)}"
# Translate text
translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512))
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
return translated_text
def translate_srt(input_file, source_language_code, target_language_code, progress=gr.Progress()):
# Load SRT file
subs = pysrt.open(input_file.name)
# Initialize an empty list to store translated subtitles
translated_subs = []
# Translate each subtitle
for idx, sub in enumerate(subs):
translated_text = translate_text(sub.text, source_language_code, target_language_code)
# Construct the translated subtitle with timestamp and line number
translated_sub = pysrt.SubRipItem(index=idx+1, start=sub.start, end=sub.end, text=translated_text)
translated_subs.append(translated_sub)
progress((idx + 1) / len(subs), desc=f"Translating subtitle {idx+1}/{len(subs)}")
# Save translated subtitles to a new SRT file
translated_file = pysrt.SubRipFile(translated_subs)
translated_srt_path = input_file.name.replace(".srt", f"_{target_language_code}.srt")
translated_file.save(translated_srt_path)
return translated_srt_path
source_language_dropdown = gr.Dropdown(choices=language_options, label="Source Language")
target_language_dropdown = gr.Dropdown(choices=language_options, label="Target Language")
file_input = gr.File(label="Upload SRT File")
iface = gr.Interface(
fn=translate_srt,
inputs=[file_input, source_language_dropdown, target_language_dropdown],
outputs=gr.File(label="Translated SRT"),
title="SRT Translator",
description="Translate subtitles from one language to another."
)
iface.launch() |