|
--- |
|
library_name: transformers |
|
language: |
|
- bo |
|
metrics: |
|
- bleu |
|
base_model: |
|
- facebook/nllb-200-distilled-600M |
|
pipeline_tag: translation |
|
--- |
|
|
|
# NLLB 600m Tibetan |
|
State of the art |
|
```python |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline |
|
|
|
def translate(text, source_lang, target_lang, model_name="TenzinGayche/nllb_600M_bi_boen_3"): |
|
# Define flores codes |
|
flores_codes = { |
|
"Standard Tibetan": "bod_Tibt", |
|
"English": "eng_Latn" |
|
} |
|
|
|
# Convert language names to flores codes |
|
source = flores_codes[source_lang] |
|
target = flores_codes[target_lang] |
|
|
|
# Load model and tokenizer |
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) |
|
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M") |
|
|
|
# Check if a GPU is available and set device accordingly |
|
device = 0 if torch.cuda.is_available() else -1 |
|
|
|
# Create translator pipeline |
|
translator = pipeline('translation', model=model, tokenizer=tokenizer, |
|
src_lang=source, tgt_lang=target, device=device) |
|
|
|
# Perform translation |
|
output = translator(text, max_length=400) |
|
|
|
# Extract translated text |
|
translated_text = output[0]['translation_text'] |
|
|
|
return translated_text |
|
|
|
# Example usage |
|
if __name__ == "__main__": |
|
input_text = "Hello, how are you?" |
|
source_language = "English" |
|
target_language = "Standard Tibetan" |
|
|
|
result = translate(input_text, source_language, target_language) |
|
print(f"Original: {input_text}") |
|
print(f"Translated: {result}") |
|
``` |