Spaces:

kstunlp
/

Kyrgyz-Language-Binary-Classifier

Sleeping

Create app.py

3dda396 verified 6 months ago

1.62 kB

	import torch
	from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification
	import streamlit as st


	model_path = "fine_tuned_xlm_roberta"
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


	tokenizer = XLMRobertaTokenizer.from_pretrained(model_path)
	model = XLMRobertaForSequenceClassification.from_pretrained(model_path)
	model.to(device)
	model.eval()


	def classify_text(text, max_length=128):
	inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=max_length)
	inputs = {key: val.to(device) for key, val in inputs.items()}

	with torch.no_grad():
	outputs = model(**inputs)
	probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
	pred_label = torch.argmax(probabilities, dim=-1).item()
	confidence = probabilities[0, pred_label].item()

	return "Kyrgyz" if pred_label == 1 else "Non-Kyrgyz", confidence


	st.title("Kyrgyz Language Classifier")
	st.write("This tool identifies whether the given text is Kyrgyz or not.")


	st.markdown("""
	Instructions:

	* Please enter a sentence for better accuracy.
	* Note: The word "Салам" might be classified as Non-Kyrgyz. This is a known exception.
	""")
	user_input = st.text_area("Enter text to classify:", placeholder="Type your sentence here...")

	if st.button("Classify"):
	if user_input.strip():
	label, confidence = classify_text(user_input)
	st.write(f"Prediction: {label}")
	st.write(f"Confidence: {confidence:.2%}")
	else:
	st.warning("Please enter some text for classification.")