File size: 1,817 Bytes
6b4436c
 
565b974
 
6b4436c
10e79e6
9eb0ac9
10e79e6
 
3514916
 
6b4436c
d95c212
3514916
565b974
3514916
565b974
 
 
 
 
10e79e6
6b4436c
3514916
 
565b974
 
3514916
 
3919100
 
3514916
 
 
565b974
3514916
 
 
565b974
3514916
 
 
 
565b974
3514916
 
 
565b974
3514916
565b974
c104f27
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import evaluate
import datasets
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

_description = """Fluency Score is a metric to score an Arabic sentence based on its "Fluency". That means is it closer to the eloquent classical Arabic (1) or not (0).
Examples of the sentences can be found in the evaluation metric's model card [Here](https://huggingface.co/Baleegh/Fluency_Score).
"""

class Fluency_Score(evaluate.Metric):
    
    def _info(self):
        return evaluate.MetricInfo(
            description="",
            citation="",
            inputs_description="",
            features=datasets.Features(
                {
                    "texts": datasets.Value("string", id="sequence"),
                }
            ),
            reference_urls=['https://huggingface.co/Baleegh/Fluency_Score'],
        )
    
    def _download_and_prepare(self, dl_manager, device=None):
        if device is None:
            device = "cuda" if torch.cuda.is_available() else "cpu"
        
        # Load the tokenizer and model from the specified repository
        self.tokenizer = AutoTokenizer.from_pretrained("Baleegh/Fluency_Score")
        self.model = AutoModelForSequenceClassification.from_pretrained("Baleegh/Fluency_Score")
        
        self.model.to(device)
        self.device = device

    def _compute(self, texts, temperature=2):
        device = self.device
        
        inputs = self.tokenizer(
            texts, 
            return_tensors="pt", 
            truncation=True, 
            padding='max_length', 
            max_length=128
        ).to(device)
        
        with torch.inference_mode():
            output = self.model(**inputs)
            prediction = output.logits.clip(0, 1)

        return {"classical_score": prediction}