submission-template

Sleeping

App Files Files Community

Medissa commited on Jan 29

Commit

6f98f11

verified ·

1 Parent(s): 3222d5d

Update tasks/text.py

Browse files

Files changed (1) hide show

tasks/text.py +67 -3

tasks/text.py CHANGED Viewed

@@ -1,3 +1,31 @@
 from fastapi import APIRouter
 from datetime import datetime
 from datasets import load_dataset
@@ -56,10 +84,46 @@ async def evaluate_text(request: TextEvaluationRequest):
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
-    # Make random predictions (placeholder for actual model inference)
-    true_labels = test_dataset["label"]
-    predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
     #--------------------------------------------------------------------------------------------

+import os
+import gc
+os.environ["CUDA_VISIBLE_DEVICES"]="0,1"
+import torch
+from tqdm import tqdm
+from typing import Optional, Union
+import pandas as pd, numpy as np, torch
+from datasets import Dataset, load_dataset
+from dataclasses import dataclass
+from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification, AutoModel
+from transformers import EarlyStoppingCallback
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer
+import numpy as np
+from sklearn.metrics import recall_score, accuracy_score
+from transformers import DataCollatorWithPadding
+VER = 1
+MAX_LEN = 256
+TOKENIZER_BINARY = "crarojasca/BinaryAugmentedCARDS"
+BINARY_MODEL = "Medissa/Roberta_Binary"
+TOKENIZER_MULTI_CLASS = "crarojasca/TaxonomyAugmentedCARDS"
+MULTI_CLASS_MODEL = "Medissa/Deberta_Taxonomy"
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 from fastapi import APIRouter
 from datetime import datetime
 from datasets import load_dataset
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
+    # Binary Model
+    tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_BINARY)
+    model = AutoModelForSequenceClassification.from_pretrained(BINARY_MODEL)
+    model.to(device)
+    model.eval()
+    predictions = []
+    for text in tqdm(dataset_test_df["quote"]):
+        with torch.no_grad():
+            tokenized_text = tokenizer(text, truncation=True, padding='max_length', return_tensors = "pt")
+            inputt = {k:v.to(device) for k,v in tokenized_text.items()}
+            # Running Binary Model
+            outputs = model(**inputt)
+            binary_prediction = torch.argmax(outputs.logits, axis=1)
+            binary_predictions = binary_prediction.to('cpu').item()
+            prediction = "0_not_relevant" if binary_prediction==0 else 1
+            predictions.append(prediction)
+    gc.collect()
+    ## 2. Taxonomy Model
+    tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_MULTI_CLASS)
+    model = AutoModelForSequenceClassification.from_pretrained(MULTI_CLASS_MODEL)
+    model.to(device)
+    model.eval()
+    for i,text in tqdm(enumerate(dataset_test_df["quote"])):
+        if isinstance(predictions[i], str):
+            continue
+        with torch.no_grad():
+            tokenized_text = tokenizer(text, truncation=True, padding='max_length', return_tensors = "pt")
+            inputt = {k:v.to(device) for k,v in tokenized_text.items()}
+            outputs = model(**inputt)
+            taxonomy_prediction = torch.argmax(outputs.logits, axis=1)
+            taxonomy_prediction = taxonomy_prediction.to('cpu').item()
+            prediction = ID2LABEL[taxonomy_prediction]
+            predictions[i] = prediction
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
     #--------------------------------------------------------------------------------------------