Spaces:
Running
Running
File size: 1,817 Bytes
6b4436c 565b974 6b4436c 10e79e6 9eb0ac9 10e79e6 3514916 6b4436c d95c212 3514916 565b974 3514916 565b974 10e79e6 6b4436c 3514916 565b974 3514916 3919100 3514916 565b974 3514916 565b974 3514916 565b974 3514916 565b974 3514916 565b974 c104f27 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import evaluate
import datasets
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
_description = """Fluency Score is a metric to score an Arabic sentence based on its "Fluency". That means is it closer to the eloquent classical Arabic (1) or not (0).
Examples of the sentences can be found in the evaluation metric's model card [Here](https://huggingface.co/Baleegh/Fluency_Score).
"""
class Fluency_Score(evaluate.Metric):
def _info(self):
return evaluate.MetricInfo(
description="",
citation="",
inputs_description="",
features=datasets.Features(
{
"texts": datasets.Value("string", id="sequence"),
}
),
reference_urls=['https://huggingface.co/Baleegh/Fluency_Score'],
)
def _download_and_prepare(self, dl_manager, device=None):
if device is None:
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load the tokenizer and model from the specified repository
self.tokenizer = AutoTokenizer.from_pretrained("Baleegh/Fluency_Score")
self.model = AutoModelForSequenceClassification.from_pretrained("Baleegh/Fluency_Score")
self.model.to(device)
self.device = device
def _compute(self, texts, temperature=2):
device = self.device
inputs = self.tokenizer(
texts,
return_tensors="pt",
truncation=True,
padding='max_length',
max_length=128
).to(device)
with torch.inference_mode():
output = self.model(**inputs)
prediction = output.logits.clip(0, 1)
return {"classical_score": prediction} |