NLLB 600m Tibetan
State of the art
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
def translate(text, source_lang, target_lang, model_name="TenzinGayche/nllb_600M_bi_boen_3"):
flores_codes = {
"Standard Tibetan": "bod_Tibt",
"English": "eng_Latn"
}
source = flores_codes[source_lang]
target = flores_codes[target_lang]
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
device = 0 if torch.cuda.is_available() else -1
translator = pipeline('translation', model=model, tokenizer=tokenizer,
src_lang=source, tgt_lang=target, device=device)
output = translator(text, max_length=400)
translated_text = output[0]['translation_text']
return translated_text
if __name__ == "__main__":
input_text = "Hello, how are you?"
source_language = "English"
target_language = "Standard Tibetan"
result = translate(input_text, source_language, target_language)
print(f"Original: {input_text}")
print(f"Translated: {result}")