File size: 1,580 Bytes
8fc9723 97b0dae 8fc9723 97b0dae 478e778 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
---
library_name: transformers
language:
- bo
metrics:
- bleu
base_model:
- facebook/nllb-200-distilled-600M
pipeline_tag: translation
---
# NLLB 600m Tibetan
State of the art
```python
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
def translate(text, source_lang, target_lang, model_name="TenzinGayche/nllb_600M_bi_boen_3"):
# Define flores codes
flores_codes = {
"Standard Tibetan": "bod_Tibt",
"English": "eng_Latn"
}
# Convert language names to flores codes
source = flores_codes[source_lang]
target = flores_codes[target_lang]
# Load model and tokenizer
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
# Check if a GPU is available and set device accordingly
device = 0 if torch.cuda.is_available() else -1
# Create translator pipeline
translator = pipeline('translation', model=model, tokenizer=tokenizer,
src_lang=source, tgt_lang=target, device=device)
# Perform translation
output = translator(text, max_length=400)
# Extract translated text
translated_text = output[0]['translation_text']
return translated_text
# Example usage
if __name__ == "__main__":
input_text = "Hello, how are you?"
source_language = "English"
target_language = "Standard Tibetan"
result = translate(input_text, source_language, target_language)
print(f"Original: {input_text}")
print(f"Translated: {result}")
``` |