Spaces:

alexneakameni
/

language_detection

Running

language_detection / app.py

Update app.py

36caef8 verified 5 months ago

1.66 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForSequenceClassification, AutoTokenizer

	# Load model and tokenizer
	model_name = "alexneakameni/language_detection"
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	# Get label mapping
	id2label = model.config.id2label

	def predict_language(text, top_k=5):
	"""Predicts the top-k languages for the given text."""
	inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
	with torch.no_grad():
	logits = model(**inputs).logits

	probs = torch.nn.functional.softmax(logits, dim=-1).squeeze()
	top_probs, top_indices = torch.topk(probs, top_k)

	results = [f"{id2label[idx.item()]}: {prob:.4f}" for prob, idx in zip(top_probs, top_indices)]
	return "\n".join(results)

	# Create Gradio interface
	demo = gr.Interface(
	fn=predict_language,
	inputs=[
	gr.Textbox(label="Enter text", placeholder="Type a sentence here..."),
	gr.Slider(1, 10, value=5, step=1, label="Top-k Languages")
	],
	outputs=gr.Textbox(label="Predicted Languages"),
	title="🌍 Language Detection",
	description="Detects the language of a given text using a fine-tuned BERT model. Returns the top-k most probable languages.",
	examples=[
	["Hello, how are you?", 5],
	["Bonjour, comment ça va?", 5],
	["Hola, ¿cómo estás?", 5],
	["Hallo, wie geht es dir?", 5],
	["Привет, как дела?", 5]
	]
	)

	demo.launch()