File size: 2,633 Bytes
72f8abd
 
 
00c6e34
 
cbb3a38
00c6e34
cbb3a38
 
00c6e34
 
cbb3a38
 
72f8abd
cbb3a38
 
72f8abd
 
cbb3a38
 
72f8abd
 
 
cbb3a38
 
72f8abd
cbb3a38
 
72f8abd
cbb3a38
 
 
72f8abd
cbb3a38
72f8abd
cbb3a38
 
 
72f8abd
cbb3a38
 
 
 
 
 
 
 
 
 
 
 
72f8abd
cbb3a38
 
 
72f8abd
cbb3a38
 
 
 
72f8abd
cbb3a38
72f8abd
cbb3a38
 
 
17c3ba4
cbb3a38
72f8abd
cbb3a38
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login
import os
import gradio as gr

# Login to Hugging Face Hub
access_token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
login(token=access_token)

# Define model details
peft_model_id = "kuyesu22/sunbird-ug-lang-v1.0-bloom-7b1-lora"
config = PeftConfig.from_pretrained(peft_model_id)

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    torch_dtype=torch.float16,  # Use mixed precision for speed
    device_map="auto"           # Automatically allocate to available devices
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Lora fine-tuned model
model = PeftModel.from_pretrained(model, peft_model_id)

# Ensure model is in evaluation mode
model.eval()

# Define inference function
def make_inference(english_text):
    # Tokenize the input English sentence
    batch = tokenizer(
        f"### English:\n{english_text}\n\n### Runyankole:",
        return_tensors="pt",
        padding=True,
        truncation=True
    ).to(model.device)  # Move batch to the same device as the model

    # Generate the translation using the model
    with torch.no_grad():
        with torch.cuda.amp.autocast():  # Mixed precision inference
            output_tokens = model.generate(
                input_ids=batch["input_ids"],
                attention_mask=batch["attention_mask"],
                max_new_tokens=100,
                do_sample=True,  # Enables sampling for more creative responses
                temperature=0.7,  # Control randomness in predictions
                num_return_sequences=1,  # Return only one translation
                pad_token_id=tokenizer.eos_token_id  # Handle padding tokens
            )

    # Decode the output tokens to get the translation
    translated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
    return translated_text

# Gradio Interface
def launch_gradio_interface():
    inputs = gr.components.Textbox(lines=2, label="English Text")  # Input text in English
    outputs = gr.components.Textbox(label="Translated Runyankole Text")  # Output in Runyankole

    # Launch Gradio app
    gr.Interface(
        fn=make_inference,
        inputs=inputs,
        outputs=outputs,
        title="Sunbird UG Lang Translator",
        description="Translate English to Runyankole using BLOOM model fine-tuned with LoRA.",
    ).launch()

# Entry point to run the Gradio app
if __name__ == "__main__":
    launch_gradio_interface()