File size: 1,357 Bytes
8ac2e6d 8bc4be3 8ac2e6d 947679a 8ac2e6d 7aa9e56 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# Scraped from https://huggingface.co/Helsinki-NLP
# Scrape code:
# console.log(Array.from(document.getElementsByClassName("text-md truncate font-mono text-black dark:group-hover/repo:text-yellow-500 group-hover/repo:text-indigo-600 text-smd")).map(element => element.innerHTML));
helsinki_models = [
"Helsinki-NLP/opus-mt-tc-big-eu-itc",
"Helsinki-NLP/opus-mt-euq-en",
"Helsinki-NLP/opus-mt-eu-ru",
"Helsinki-NLP/opus-mt-eu-es",
"Helsinki-NLP/opus-mt-eu-en",
"Helsinki-NLP/opus-mt-eu-de",
"Helsinki-NLP/opus-mt-en-euq",
"Helsinki-NLP/opus-mt-en-euq",
"Helsinki-NLP/opus-mt-en-eu",
"Helsinki-NLP/opus-mt-de-eu",
"Helsinki-NLP/opus-mt-ru-eu",
"Helsinki-NLP/opus-mt-es-eu"
]
def get_clearly_formatted_language_directions():
# Clearly formatted language directions only
# Helsinki-NLP/opus-mt-tc-base-bat-zle is ignored
# Helsinki-NLP/opus-mt-tc-fr-en is accepted due to clarity of source language being fr and target language being en
language_directions = [model.split("Helsinki-NLP/opus-mt-")[1] for model in helsinki_models
if (len(model.split("-"))<6 and
"opus-mt" in model and
len(model.split("_")) == 1)
and len(model) < len("Helsinki-NLP/opus-mt-src-trg")]
return language_directions |