submission-template

Sleeping

App Files Files Community

Medissa commited on Jan 30

Commit

e48e29d

verified ·

1 Parent(s): 00d0dca

Update tasks/text.py

Browse files

Files changed (1) hide show

tasks/text.py +14 -4

tasks/text.py CHANGED Viewed

@@ -16,6 +16,9 @@ from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer
 import numpy as np
 from sklearn.metrics import recall_score, accuracy_score
 from transformers import DataCollatorWithPadding
 VER = 1
 MAX_LEN = 256
@@ -92,15 +95,17 @@ async def evaluate_text(request: TextEvaluationRequest):
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
     # Binary Model
     tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_BINARY)
     model = AutoModelForSequenceClassification.from_pretrained(BINARY_MODEL)
     model.to(device)
     model.eval()
     predictions = []
-    for text in tqdm(test_dataset["quote"]):
         with torch.no_grad():
             tokenized_text = tokenizer(text, truncation=True, padding='max_length', return_tensors = "pt")
             inputt = {k:v.to(device) for k,v in tokenized_text.items()}
@@ -111,15 +116,19 @@ async def evaluate_text(request: TextEvaluationRequest):
             prediction = "0_not_relevant" if binary_prediction==0 else 1
             predictions.append(prediction)
     gc.collect()
     ## 2. Taxonomy Model
     tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_MULTI_CLASS)
     model = AutoModelForSequenceClassification.from_pretrained(MULTI_CLASS_MODEL)
     model.to(device)
     model.eval()
     for i,text in tqdm(enumerate(test_dataset["quote"])):
         if isinstance(predictions[i], str):
             continue
@@ -132,7 +141,8 @@ async def evaluate_text(request: TextEvaluationRequest):
             prediction = ID2LABEL[taxonomy_prediction]
             predictions[i] = prediction
     predictions = [LABEL_MAPPING[pred] for pred in predictions]
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE

 import numpy as np
 from sklearn.metrics import recall_score, accuracy_score
 from transformers import DataCollatorWithPadding
+import logging
+# import mylib
+logger = logging.getLogger(__name__)
 VER = 1
 MAX_LEN = 256
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
+    logger.info('Start Binary')
     # Binary Model
     tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_BINARY)
+    logger.info('Loaded Tokenizer')
     model = AutoModelForSequenceClassification.from_pretrained(BINARY_MODEL)
+    logger.info('Loaded Model')
     model.to(device)
     model.eval()
     predictions = []
+    for i,text in tqdm(enumerate(test_dataset["quote"])):
         with torch.no_grad():
             tokenized_text = tokenizer(text, truncation=True, padding='max_length', return_tensors = "pt")
             inputt = {k:v.to(device) for k,v in tokenized_text.items()}
             prediction = "0_not_relevant" if binary_prediction==0 else 1
             predictions.append(prediction)
+        if i%10:
+            logger.info(f'iteration: {i}')
     gc.collect()
     ## 2. Taxonomy Model
+    logger.info('Start Multi')
     tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_MULTI_CLASS)
+    logger.info('Loaded Tokenizer')
     model = AutoModelForSequenceClassification.from_pretrained(MULTI_CLASS_MODEL)
     model.to(device)
     model.eval()
+    logger.info('Loaded Model')
     for i,text in tqdm(enumerate(test_dataset["quote"])):
         if isinstance(predictions[i], str):
             continue
             prediction = ID2LABEL[taxonomy_prediction]
             predictions[i] = prediction
+        if i%10:
+            logger.info(f'iteration: {i}')
     predictions = [LABEL_MAPPING[pred] for pred in predictions]
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE