jaynopponep commited on
Commit
7cc941c
·
1 Parent(s): d03875a

removing train.py

Browse files
Files changed (1) hide show
  1. train.py +0 -64
train.py DELETED
@@ -1,64 +0,0 @@
1
- from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
2
- import numpy as np
3
- import pandas as pd
4
- from datasets import Dataset
5
- from sklearn.model_selection import train_test_split
6
- from sklearn.metrics import accuracy_score, precision_recall_fscore_support
7
-
8
-
9
- # Load dataset dynamically or from a config
10
- df = pd.read_csv("AI_Human.csv")
11
- train_df, eval_df = train_test_split(df, test_size=0.2)
12
- tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
13
-
14
- def tokenize_function(examples):
15
- return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
16
-
17
- # Convert DataFrames to Datasets and apply tokenization
18
- train_dataset = Dataset.from_pandas(train_df)
19
- eval_dataset = Dataset.from_pandas(eval_df)
20
-
21
- train_dataset = train_dataset.map(tokenize_function, batched=True)
22
- train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
23
-
24
- eval_dataset = eval_dataset.map(tokenize_function, batched=True)
25
- eval_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
26
-
27
- model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
28
-
29
-
30
- training_args = TrainingArguments(
31
- output_dir="./results",
32
- num_train_epochs=3,
33
- per_device_train_batch_size=8,
34
- per_device_eval_batch_size=8,
35
- warmup_steps=500,
36
- weight_decay=0.01,
37
- logging_dir='./logs',
38
- evaluation_strategy="steps",
39
- save_steps=500,
40
- logging_steps=100,
41
- )
42
- def compute_metrics(pred):
43
- labels = pred.label_ids
44
- preds = np.argmax(pred.predictions, axis=-1)
45
- precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
46
- acc = accuracy_score(labels, preds)
47
- return {
48
- 'accuracy': acc,
49
- 'f1': f1,
50
- 'precision': precision,
51
- 'recall': recall
52
- }
53
-
54
- trainer = Trainer(
55
- model=model,
56
- args=training_args,
57
- train_dataset=train_dataset,
58
- eval_dataset=eval_dataset,
59
- compute_metrics=compute_metrics
60
- )
61
-
62
- trainer.train()
63
- model.save_pretrained("./trained_model")
64
- tokenizer.save_pretrained("./trained_model")