submission-template

Sleeping

App Files Files Community

NaolTaye commited on Feb 2

Commit

49eadc2

verified ·

1 Parent(s): 0ae53cb

Update tasks/text.py

Browse files

Files changed (1) hide show

tasks/text.py +37 -2

tasks/text.py CHANGED Viewed

@@ -4,12 +4,14 @@ from datasets import load_dataset
 from sklearn.metrics import accuracy_score
 import random
 from .utils.evaluation import TextEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
 router = APIRouter()
-DESCRIPTION = "Random Baseline"
 ROUTE = "/text"
 @router.post(ROUTE, tags=["Text Task"],
@@ -55,10 +57,43 @@ async def evaluate_text(request: TextEvaluationRequest):
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
     # Make random predictions (placeholder for actual model inference)
     true_labels = test_dataset["label"]
-    predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE

 from sklearn.metrics import accuracy_score
 import random
+from transformers import AutoTokenizer,BertForSequenceClassification,AutoModelForSequenceClassification,Trainer, TrainingArguments,DataCollatorWithPadding
 from .utils.evaluation import TextEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
 router = APIRouter()
+DESCRIPTION = "BERT V1.1"
 ROUTE = "/text"
 @router.post(ROUTE, tags=["Text Task"],
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    tokenizer = AutoTokenizer.from_pretrained("cococli/bert-base-uncased-frugalai")
+    model = AutoModelForSequenceClassification.from_pretrained("cococli/bert-base-uncased-frugalai").to(device)
+    # Fonction de préprocessing
+    def preprocess_function(df):
+        tokenized = tokenizer(df["quote"], truncation=True) # Removed padding here
+        return tokenized
+    tokenized_test = test_dataset.map(preprocess_function, batched=True)
+    tokenized_test.set_format(type="torch", columns=["input_ids", "attention_mask"])
+    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+    batch_size = 16
+    test_loader = DataLoader(tokenized_test, batch_size=batch_size, collate_fn=data_collator)
+    model.eval()
+    # Inférence sur GPU
+    predictions = []
+    with torch.no_grad():
+        for batch in test_loader:
+            input_ids = batch['input_ids'].to(device)
+            attention_mask = batch['attention_mask'].to(device)
+            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+            logits = outputs.logits
+            preds = torch.argmax(logits, dim=-1)
+            predictions.extend(preds.cpu().numpy())
     # Make random predictions (placeholder for actual model inference)
     true_labels = test_dataset["label"]
+    # predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE