File size: 2,665 Bytes
4bdcdda
 
d60a3d5
a03a333
d60a3d5
4bdcdda
 
 
d65424d
4bdcdda
 
0f9dc19
4bdcdda
31f1556
2189552
d65424d
 
 
31f1556
 
d65424d
 
31f1556
 
d65424d
 
 
 
 
31f1556
 
d65424d
 
31f1556
d65424d
2189552
4bdcdda
 
2189552
d60a3d5
 
550812d
 
d523365
aac41e3
d60a3d5
2189552
d65424d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import requests
import pandas as pd
import gradio as gr
from transformers import MarianMTModel, MarianTokenizer

# Fetch and parse language options from the provided URL
url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
response = requests.get(url)
df = pd.read_csv(response.url, delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
df['ISO 639-1'] = df['ISO 639-1'].str.strip()

# Prepare language options for the dropdown
language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']}") for index, row in df.iterrows()]

def translate_text(text, source_language_code, target_language_code):
    # Construct model name using ISO 639-1 codes
    model_name = f"Helsinki-NLP/opus-mt-{source_language_code}-{target_language_code}"

    # Check if source and target languages are the same, which is not supported for translation
    if source_language_code == target_language_code:
        return "Translation between the same languages is not supported."

    # Load tokenizer and model
    try:
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)
    except Exception as e:
        return f"Failed to load model for {source_language_code} to {target_language_code}: {str(e)}"

    # Translate text
    translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512))
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    
    return translated_text

source_language_dropdown = gr.Dropdown(choices=language_options, label="Source Language")
target_language_dropdown = gr.Dropdown(choices=language_options, label="Target Language")

iface = gr.Interface(
    fn=translate_text,
    inputs=[gr.Textbox(lines=2, placeholder="Enter text to translate..."), source_language_dropdown, target_language_dropdown],
    outputs=gr.Textbox(),
    title="Translator API",
    description="We use model from [Language Technology Research Group at the University of Helsinki](https://huggingface.co/Helsinki-NLP). For web use please visit [this space](https://huggingface.co/spaces/Lenylvt/Translator). πŸ”΄ If you have this error : 'Failed to load model for fr to aa: Helsinki-NLP/opus-mt-fr-aa is not a local folder and is not a valid model identifier listed on https://huggingface.co/models If this is a private repository, make sure to pass a token having permission to this repo either by logging in with huggingface-cli login or by passing token=<your_token>', its because the language is not available."
)

iface.launch()