NLLB 600m Tibetan

State of the art

import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

def translate(text, source_lang, target_lang, model_name="TenzinGayche/nllb_600M_bi_boen_3"):
    # Define flores codes
    flores_codes = {
        "Standard Tibetan": "bod_Tibt",
        "English": "eng_Latn"
    }

    # Convert language names to flores codes
    source = flores_codes[source_lang]
    target = flores_codes[target_lang]

    # Load model and tokenizer
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")

    # Check if a GPU is available and set device accordingly
    device = 0 if torch.cuda.is_available() else -1

    # Create translator pipeline
    translator = pipeline('translation', model=model, tokenizer=tokenizer, 
                          src_lang=source, tgt_lang=target, device=device)

    # Perform translation
    output = translator(text, max_length=400)

    # Extract translated text
    translated_text = output[0]['translation_text']

    return translated_text

# Example usage
if __name__ == "__main__":
    input_text = "Hello, how are you?"
    source_language = "English"
    target_language = "Standard Tibetan"
    
    result = translate(input_text, source_language, target_language)
    print(f"Original: {input_text}")
    print(f"Translated: {result}")
Downloads last month
16,541
Safetensors
Model size
615M params
Tensor type
F32
ยท
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Model tree for TenzinGayche/nllb_600M_bi_boen_3

Finetuned
(86)
this model

Space using TenzinGayche/nllb_600M_bi_boen_3 1