NaolTaye commited on
Commit
9fde312
·
1 Parent(s): c3ebc78
Files changed (1) hide show
  1. tasks/text.py +14 -16
tasks/text.py CHANGED
@@ -68,21 +68,21 @@ async def evaluate_text(request: TextEvaluationRequest):
68
  tokenizer = AutoTokenizer.from_pretrained("cococli/bert-base-uncased-frugalai")
69
  model = AutoModelForSequenceClassification.from_pretrained("cococli/bert-base-uncased-frugalai").to(device)
70
 
71
- def tokenize_function(examples):
72
- return tokenizer(examples["quote"], padding=True, truncation=True, return_tensors='pt')
73
- print('BEFORE TOKENIZING')
74
- # Tokenize the test dataset
75
- tokenized_test = test_dataset.map(tokenize_function, batched=True)
76
- print('AFTER TOKENIZING')
77
- print(tokenized_test.column_names) # Debugging step
78
- print(tokenized_test['input_ids'][:5]) # Debugging step
79
-
80
- # Create DataLoader
81
- data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
82
- dataloader = DataLoader(tokenized_test, batch_size=16, shuffle=False, collate_fn=data_collator)
83
 
84
  print("Started prediction run")
85
-
86
  # Model inference
87
  model.eval()
88
  predictions = np.array([])
@@ -90,9 +90,7 @@ async def evaluate_text(request: TextEvaluationRequest):
90
  with torch.no_grad():
91
  print('BEFORE PREDICTION')
92
 
93
- test_input_ids = tokenized_test["input_ids"]
94
- test_attention_mask = tokenized_test["attention_mask"]
95
- outputs = model(test_input_ids, test_attention_mask)
96
  p = torch.argmax(outputs.logits, dim=1)
97
  predictions = np.append(predictions, p.cpu().numpy())
98
 
 
68
  tokenizer = AutoTokenizer.from_pretrained("cococli/bert-base-uncased-frugalai")
69
  model = AutoModelForSequenceClassification.from_pretrained("cococli/bert-base-uncased-frugalai").to(device)
70
 
71
+ # def tokenize_function(examples):
72
+ # return tokenizer(examples["quote"], padding=True, truncation=True, return_tensors='pt')
73
+ # print('BEFORE TOKENIZING')
74
+ # # Tokenize the test dataset
75
+ # tokenized_test = test_dataset.map(tokenize_function, batched=True)
76
+ # print('AFTER TOKENIZING')
77
+ # print(tokenized_test.column_names) # Debugging step
78
+ # print(tokenized_test['input_ids'][:5]) # Debugging step
79
+
80
+ # # Create DataLoader
81
+ # data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
82
+ # dataloader = DataLoader(tokenized_test, batch_size=16, shuffle=False, collate_fn=data_collator)
83
 
84
  print("Started prediction run")
85
+ tokenized_test = tokenizer(test_dataset['quote'], padding=True, truncation=True, return_tensors='pt')
86
  # Model inference
87
  model.eval()
88
  predictions = np.array([])
 
90
  with torch.no_grad():
91
  print('BEFORE PREDICTION')
92
 
93
+ outputs = model(**tokenized_test)
 
 
94
  p = torch.argmax(outputs.logits, dim=1)
95
  predictions = np.append(predictions, p.cpu().numpy())
96