Spaces:

Lenylvt
/

SRT_Translation-API

Runtime error

App Files Files Community

Lenylvt commited on Feb 16, 2024

Commit

e002d92

verified ·

1 Parent(s): 6754994

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -29

app.py CHANGED Viewed

@@ -1,25 +1,23 @@
-import requests
-import pandas as pd
 import gradio as gr
 from transformers import MarianMTModel, MarianTokenizer
-import io
-import pysrt
-# Fetch and parse language options
 url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
-response = requests.get(url)
-df = pd.read_csv(io.StringIO(response.text), delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
 df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
 df['ISO 639-1'] = df['ISO 639-1'].str.strip()
 # Prepare language options for the dropdown
-language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']} - {row['Language Name'].strip()}") for index, row in df.iterrows()]
 def translate_text(text, source_language_code, target_language_code):
     # Construct model name using ISO 639-1 codes
     model_name = f"Helsinki-NLP/opus-mt-{source_language_code}-{target_language_code}"
-    # Check if source and target languages are the same
     if source_language_code == target_language_code:
         return "Translation between the same languages is not supported."
@@ -36,38 +34,30 @@ def translate_text(text, source_language_code, target_language_code):
     return translated_text
-def translate_srt(file_info, source_language_code, target_language_code):
-    # Assuming file_info is a dictionary with 'content' holding the file's bytes
-    file_content = file_info['content']  # Correctly access the bytes content of the file
-    # Use pysrt to load subtitles from the file content
-    subs = pysrt.open(io.BytesIO(file_content))
     # Translate each subtitle
-    for sub in subs:
         translated_text = translate_text(sub.text, source_language_code, target_language_code)
-        sub.text = translated_text
-    # Save the translated subtitles to a temporary file
-    output_path = "/mnt/data/translated_srt.srt"
-    with open(output_path, "w", encoding="utf-8") as file:
-        subs.save(file, encoding='utf-8')
-    return output_path
 source_language_dropdown = gr.Dropdown(choices=language_options, label="Source Language")
 target_language_dropdown = gr.Dropdown(choices=language_options, label="Target Language")
 iface = gr.Interface(
     fn=translate_srt,
-    inputs=[
-        gr.File(label="Upload SRT File"),
-        source_language_dropdown,
-        target_language_dropdown
-    ],
-    outputs=gr.File(label="Download Translated SRT File"),
     title="SRT Translator",
-    description="Translate SubRip Text (SRT) subtitle files. This tool uses models from the Language Technology Research Group at the University of Helsinki."
 )
 iface.launch()

+import pysrt
 import gradio as gr
+import pandas as pd
 from transformers import MarianMTModel, MarianTokenizer
+from tqdm import tqdm
+# Fetch and parse language options from the provided URL
 url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
+df = pd.read_csv(url, delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
 df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
 df['ISO 639-1'] = df['ISO 639-1'].str.strip()
 # Prepare language options for the dropdown
+language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']}") for index, row in df.iterrows()]
 def translate_text(text, source_language_code, target_language_code):
     # Construct model name using ISO 639-1 codes
     model_name = f"Helsinki-NLP/opus-mt-{source_language_code}-{target_language_code}"
+    # Check if source and target languages are the same, which is not supported for translation
     if source_language_code == target_language_code:
         return "Translation between the same languages is not supported."
     return translated_text
+def translate_srt(input_file, source_language_code, target_language_code):
+    # Load SRT file
+    subs = pysrt.open(input_file)
+    # Initialize an empty list to store translated subtitles
+    translated_subs = []
     # Translate each subtitle
+    for sub in tqdm(subs, desc="Translating"):
         translated_text = translate_text(sub.text, source_language_code, target_language_code)
+        translated_subs.append(translated_text)
+    return "\n".join(translated_subs)
 source_language_dropdown = gr.Dropdown(choices=language_options, label="Source Language")
 target_language_dropdown = gr.Dropdown(choices=language_options, label="Target Language")
+file_input = gr.inputs.File(label="Upload SRT File", type="text")
 iface = gr.Interface(
     fn=translate_srt,
+    inputs=[file_input, source_language_dropdown, target_language_dropdown],
+    outputs=gr.Textbox(label="Translated SRT"),
     title="SRT Translator",
+    description="Translate subtitles from one language to another."
 )
 iface.launch()