Spaces:

kuyesu22
/

sunbird

Sleeping

App Files Files Community

sunbird / app.py

kuyesu22

Update app.py

17c3ba4 verified 5 months ago

raw

history blame contribute delete

2.63 kB

	import torch
	from peft import PeftModel, PeftConfig
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from huggingface_hub import login
	import os
	import gradio as gr

	# Login to Hugging Face Hub
	access_token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
	login(token=access_token)

	# Define model details
	peft_model_id = "kuyesu22/sunbird-ug-lang-v1.0-bloom-7b1-lora"
	config = PeftConfig.from_pretrained(peft_model_id)

	# Load model and tokenizer
	model = AutoModelForCausalLM.from_pretrained(
	config.base_model_name_or_path,
	torch_dtype=torch.float16, # Use mixed precision for speed
	device_map="auto" # Automatically allocate to available devices
	)
	tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

	# Load the Lora fine-tuned model
	model = PeftModel.from_pretrained(model, peft_model_id)

	# Ensure model is in evaluation mode
	model.eval()

	# Define inference function
	def make_inference(english_text):
	# Tokenize the input English sentence
	batch = tokenizer(
	f"### English:\n{english_text}\n\n### Runyankole:",
	return_tensors="pt",
	padding=True,
	truncation=True
	).to(model.device) # Move batch to the same device as the model

	# Generate the translation using the model
	with torch.no_grad():
	with torch.cuda.amp.autocast(): # Mixed precision inference
	output_tokens = model.generate(
	input_ids=batch["input_ids"],
	attention_mask=batch["attention_mask"],
	max_new_tokens=100,
	do_sample=True, # Enables sampling for more creative responses
	temperature=0.7, # Control randomness in predictions
	num_return_sequences=1, # Return only one translation
	pad_token_id=tokenizer.eos_token_id # Handle padding tokens
	)

	# Decode the output tokens to get the translation
	translated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
	return translated_text

	# Gradio Interface
	def launch_gradio_interface():
	inputs = gr.components.Textbox(lines=2, label="English Text") # Input text in English
	outputs = gr.components.Textbox(label="Translated Runyankole Text") # Output in Runyankole

	# Launch Gradio app
	gr.Interface(
	fn=make_inference,
	inputs=inputs,
	outputs=outputs,
	title="Sunbird UG Lang Translator",
	description="Translate English to Runyankole using BLOOM model fine-tuned with LoRA.",
	).launch()

	# Entry point to run the Gradio app
	if __name__ == "__main__":
	launch_gradio_interface()