jaynopponep commited on
Commit
6c014d0
·
1 Parent(s): d4939c3

Changing train.py

Browse files
Files changed (1) hide show
  1. train.py +14 -0
train.py CHANGED
@@ -2,6 +2,8 @@ from model import get_model
2
  import torch
3
  from transformers import BertTokenizer, Trainer, TrainingArguments
4
  from datasets import load_dataset
 
 
5
 
6
  # Load dataset dynamically or from a config
7
  dataset_name = "NicolaiSivesind/human-vs-machine"
@@ -9,6 +11,18 @@ dataset = load_dataset(dataset_name)
9
 
10
  tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
11
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def tokenize_function(examples):
13
  # Add any specific preprocessing steps if necessary
14
  return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
 
2
  import torch
3
  from transformers import BertTokenizer, Trainer, TrainingArguments
4
  from datasets import load_dataset
5
+ import numpy as np
6
+ from sklearn.metrics import accuracy_score, precision_recall_fscore_support
7
 
8
  # Load dataset dynamically or from a config
9
  dataset_name = "NicolaiSivesind/human-vs-machine"
 
11
 
12
  tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
13
 
14
+ def compute_metrics(pred):
15
+ labels = pred.label_ids
16
+ preds = np.argmax(pred.predictions, axis=1)
17
+ precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
18
+ acc = accuracy_score(labels, preds)
19
+ return {
20
+ 'accuracy': acc,
21
+ 'f1': f1,
22
+ 'precision': precision,
23
+ 'recall': recall
24
+ }
25
+
26
  def tokenize_function(examples):
27
  # Add any specific preprocessing steps if necessary
28
  return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)