File size: 1,357 Bytes
8ac2e6d
 
 
 
 
8bc4be3
8ac2e6d
 
 
 
 
 
 
 
 
 
 
 
 
947679a
8ac2e6d
 
 
 
 
 
 
 
7aa9e56
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# Scraped from https://huggingface.co/Helsinki-NLP
# Scrape code:
# console.log(Array.from(document.getElementsByClassName("text-md truncate font-mono text-black dark:group-hover/repo:text-yellow-500 group-hover/repo:text-indigo-600 text-smd")).map(element => element.innerHTML));

helsinki_models = [
  "Helsinki-NLP/opus-mt-tc-big-eu-itc",
  "Helsinki-NLP/opus-mt-euq-en",
  "Helsinki-NLP/opus-mt-eu-ru",
  "Helsinki-NLP/opus-mt-eu-es",
  "Helsinki-NLP/opus-mt-eu-en",
  "Helsinki-NLP/opus-mt-eu-de",
  "Helsinki-NLP/opus-mt-en-euq",
  "Helsinki-NLP/opus-mt-en-euq",
  "Helsinki-NLP/opus-mt-en-eu",
  "Helsinki-NLP/opus-mt-de-eu",
  "Helsinki-NLP/opus-mt-ru-eu",
  "Helsinki-NLP/opus-mt-es-eu"
]

def get_clearly_formatted_language_directions():
    # Clearly formatted language directions only
    # Helsinki-NLP/opus-mt-tc-base-bat-zle is ignored
    # Helsinki-NLP/opus-mt-tc-fr-en is accepted due to clarity of source language being fr and target language being en
    language_directions = [model.split("Helsinki-NLP/opus-mt-")[1] for model in helsinki_models 
                           if (len(model.split("-"))<6 and 
                               "opus-mt" in model and 
                               len(model.split("_")) == 1)
                               and len(model) < len("Helsinki-NLP/opus-mt-src-trg")]
    return language_directions