|
import streamlit as st |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import torch |
|
|
|
|
|
|
|
@st.cache_resource |
|
def load_model_and_tokenizer(): |
|
tokenizer = AutoTokenizer.from_pretrained("aubmindlab/bert-base-arabertv02") |
|
model = AutoModelForSequenceClassification.from_pretrained("aubmindlab/bert-base-arabertv02") |
|
return tokenizer, model |
|
|
|
tokenizer, model = load_model_and_tokenizer() |
|
|
|
|
|
st.title("AraBERT Demo - تصنيف النصوص العربية") |
|
text_input = st.text_area("أدخل نصًا عربيًا:") |
|
|
|
if st.button("صنف"): |
|
if text_input: |
|
|
|
inputs = tokenizer(text_input, return_tensors="pt", truncation=True, padding=True) |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
logits = outputs.logits |
|
predicted_class_id = logits.argmax().item() |
|
|
|
|
|
labels = ["تصنيف 1", "تصنيف 2", "تصنيف 3"] |
|
st.write(f"التصنيف المتوقع: {labels[predicted_class_id]}") |
|
else: |
|
st.warning("يرجى إدخال نص.") |
|
|