Commit
·
dfcb0a5
1
Parent(s):
897eef4
retrain on last-linear-layer
Browse files- classifier.py +3 -3
- justfile +14 -1
- trainings/last-layer-no-dropout.txt +0 -38
- trainings/last-layer-w-dropout.txt +0 -38
- unsup_simcse.py +2 -2
classifier.py
CHANGED
@@ -13,7 +13,7 @@ from bert import BertModel
|
|
13 |
from optimizer import AdamW
|
14 |
|
15 |
|
16 |
-
TQDM_DISABLE=
|
17 |
|
18 |
|
19 |
# Fix the random seed.
|
@@ -340,14 +340,14 @@ def test(args):
|
|
340 |
def get_args():
|
341 |
parser = argparse.ArgumentParser()
|
342 |
parser.add_argument("--seed", type=int, default=11711)
|
343 |
-
parser.add_argument("--num-cpu-cores", type=int, default=
|
344 |
parser.add_argument("--epochs", type=int, default=10)
|
345 |
parser.add_argument("--fine-tune-mode", type=str,
|
346 |
help='last-linear-layer: the BERT parameters are frozen and the task specific head parameters are updated; full-model: BERT parameters are updated as well',
|
347 |
choices=('last-linear-layer', 'full-model'), default="last-linear-layer")
|
348 |
parser.add_argument("--use_gpu", action='store_true')
|
349 |
|
350 |
-
parser.add_argument("--batch_size_sst", help='64 can fit a 12GB GPU', type=int, default=
|
351 |
parser.add_argument("--batch_size_cfimdb", help='8 can fit a 12GB GPU', type=int, default=8)
|
352 |
parser.add_argument("--hidden_dropout_prob", type=float, default=0.3)
|
353 |
parser.add_argument("--lr", type=float, help="learning rate, default lr for 'pretrain': 1e-3, 'finetune': 1e-5",
|
|
|
13 |
from optimizer import AdamW
|
14 |
|
15 |
|
16 |
+
TQDM_DISABLE=True
|
17 |
|
18 |
|
19 |
# Fix the random seed.
|
|
|
340 |
def get_args():
|
341 |
parser = argparse.ArgumentParser()
|
342 |
parser.add_argument("--seed", type=int, default=11711)
|
343 |
+
parser.add_argument("--num-cpu-cores", type=int, default=8)
|
344 |
parser.add_argument("--epochs", type=int, default=10)
|
345 |
parser.add_argument("--fine-tune-mode", type=str,
|
346 |
help='last-linear-layer: the BERT parameters are frozen and the task specific head parameters are updated; full-model: BERT parameters are updated as well',
|
347 |
choices=('last-linear-layer', 'full-model'), default="last-linear-layer")
|
348 |
parser.add_argument("--use_gpu", action='store_true')
|
349 |
|
350 |
+
parser.add_argument("--batch_size_sst", help='64 can fit a 12GB GPU', type=int, default=64)
|
351 |
parser.add_argument("--batch_size_cfimdb", help='8 can fit a 12GB GPU', type=int, default=8)
|
352 |
parser.add_argument("--hidden_dropout_prob", type=float, default=0.3)
|
353 |
parser.add_argument("--lr", type=float, help="learning rate, default lr for 'pretrain': 1e-3, 'finetune': 1e-5",
|
justfile
CHANGED
@@ -1 +1,14 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Testing on Google Cloud VM with no GPU and 8 cpu cores.
|
2 |
+
|
3 |
+
# If this doesn't meet your need, add the --use_gpu
|
4 |
+
# or --num-cpu-cores arguments to the existing commands.
|
5 |
+
|
6 |
+
|
7 |
+
default:
|
8 |
+
@just --list
|
9 |
+
|
10 |
+
last-linear:
|
11 |
+
python classifier.py
|
12 |
+
|
13 |
+
full-model:
|
14 |
+
python classifier.py --fine-tune-mode full-model --lr 1e-5
|
trainings/last-layer-no-dropout.txt
DELETED
@@ -1,38 +0,0 @@
|
|
1 |
-
Training Sentiment Classifier on SST...
|
2 |
-
load 8544 data from data/ids-sst-train.csv
|
3 |
-
load 1101 data from data/ids-sst-dev.csv
|
4 |
-
Epoch 0: train loss :: 1.429, train acc :: 0.454, dev acc :: 0.441
|
5 |
-
Epoch 1: train loss :: 1.297, train acc :: 0.467, dev acc :: 0.431
|
6 |
-
Epoch 2: train loss :: 1.253, train acc :: 0.489, dev acc :: 0.450
|
7 |
-
Epoch 3: train loss :: 1.233, train acc :: 0.491, dev acc :: 0.455
|
8 |
-
Epoch 4: train loss :: 1.214, train acc :: 0.501, dev acc :: 0.450
|
9 |
-
Epoch 5: train loss :: 1.211, train acc :: 0.511, dev acc :: 0.465
|
10 |
-
Epoch 6: train loss :: 1.199, train acc :: 0.515, dev acc :: 0.478
|
11 |
-
Epoch 7: train loss :: 1.192, train acc :: 0.518, dev acc :: 0.481
|
12 |
-
Epoch 8: train loss :: 1.191, train acc :: 0.513, dev acc :: 0.467
|
13 |
-
Epoch 9: train loss :: 1.191, train acc :: 0.505, dev acc :: 0.448
|
14 |
-
Evaluating on SST...
|
15 |
-
load model from sst-classifier.pt
|
16 |
-
load 1101 data from data/ids-sst-dev.csv
|
17 |
-
DONE DEV
|
18 |
-
DONE Test
|
19 |
-
dev acc :: 0.481
|
20 |
-
Training Sentiment Classifier on cfimdb...
|
21 |
-
load 1707 data from data/ids-cfimdb-train.csv
|
22 |
-
load 245 data from data/ids-cfimdb-dev.csv
|
23 |
-
Epoch 0: train loss :: 0.574, train acc :: 0.821, dev acc :: 0.829
|
24 |
-
Epoch 1: train loss :: 0.466, train acc :: 0.866, dev acc :: 0.857
|
25 |
-
Epoch 2: train loss :: 0.419, train acc :: 0.872, dev acc :: 0.873
|
26 |
-
Epoch 3: train loss :: 0.386, train acc :: 0.878, dev acc :: 0.833
|
27 |
-
Epoch 4: train loss :: 0.373, train acc :: 0.899, dev acc :: 0.849
|
28 |
-
Epoch 5: train loss :: 0.357, train acc :: 0.893, dev acc :: 0.865
|
29 |
-
Epoch 6: train loss :: 0.342, train acc :: 0.905, dev acc :: 0.873
|
30 |
-
Epoch 7: train loss :: 0.334, train acc :: 0.906, dev acc :: 0.873
|
31 |
-
Epoch 8: train loss :: 0.345, train acc :: 0.892, dev acc :: 0.824
|
32 |
-
Epoch 9: train loss :: 0.321, train acc :: 0.888, dev acc :: 0.820
|
33 |
-
Evaluating on cfimdb...
|
34 |
-
load model from cfimdb-classifier.pt
|
35 |
-
load 245 data from data/ids-cfimdb-dev.csv
|
36 |
-
DONE DEV
|
37 |
-
DONE Test
|
38 |
-
dev acc :: 0.873
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
trainings/last-layer-w-dropout.txt
DELETED
@@ -1,38 +0,0 @@
|
|
1 |
-
Training Sentiment Classifier on SST...
|
2 |
-
load 8544 data from data/ids-sst-train.csv
|
3 |
-
load 1101 data from data/ids-sst-dev.csv
|
4 |
-
Epoch 0: train loss :: 1.458, train acc :: 0.460, dev acc :: 0.442
|
5 |
-
Epoch 1: train loss :: 1.331, train acc :: 0.472, dev acc :: 0.440
|
6 |
-
Epoch 2: train loss :: 1.288, train acc :: 0.476, dev acc :: 0.447
|
7 |
-
Epoch 3: train loss :: 1.269, train acc :: 0.490, dev acc :: 0.457
|
8 |
-
Epoch 4: train loss :: 1.252, train acc :: 0.485, dev acc :: 0.446
|
9 |
-
Epoch 5: train loss :: 1.242, train acc :: 0.487, dev acc :: 0.447
|
10 |
-
Epoch 6: train loss :: 1.235, train acc :: 0.511, dev acc :: 0.472
|
11 |
-
Epoch 7: train loss :: 1.235, train acc :: 0.512, dev acc :: 0.465
|
12 |
-
Epoch 8: train loss :: 1.235, train acc :: 0.512, dev acc :: 0.472
|
13 |
-
Epoch 9: train loss :: 1.227, train acc :: 0.509, dev acc :: 0.475
|
14 |
-
Evaluating on SST...
|
15 |
-
load model from sst-classifier.pt
|
16 |
-
load 1101 data from data/ids-sst-dev.csv
|
17 |
-
DONE DEV
|
18 |
-
DONE Test
|
19 |
-
dev acc :: 0.475
|
20 |
-
Training Sentiment Classifier on cfimdb...
|
21 |
-
load 1707 data from data/ids-cfimdb-train.csv
|
22 |
-
load 245 data from data/ids-cfimdb-dev.csv
|
23 |
-
Epoch 0: train loss :: 0.590, train acc :: 0.819, dev acc :: 0.849
|
24 |
-
Epoch 1: train loss :: 0.510, train acc :: 0.826, dev acc :: 0.845
|
25 |
-
Epoch 2: train loss :: 0.459, train acc :: 0.848, dev acc :: 0.853
|
26 |
-
Epoch 3: train loss :: 0.438, train acc :: 0.880, dev acc :: 0.857
|
27 |
-
Epoch 4: train loss :: 0.413, train acc :: 0.876, dev acc :: 0.869
|
28 |
-
Epoch 5: train loss :: 0.406, train acc :: 0.890, dev acc :: 0.833
|
29 |
-
Epoch 6: train loss :: 0.401, train acc :: 0.893, dev acc :: 0.845
|
30 |
-
Epoch 7: train loss :: 0.403, train acc :: 0.870, dev acc :: 0.861
|
31 |
-
Epoch 8: train loss :: 0.393, train acc :: 0.879, dev acc :: 0.865
|
32 |
-
Epoch 9: train loss :: 0.407, train acc :: 0.895, dev acc :: 0.873
|
33 |
-
Evaluating on cfimdb...
|
34 |
-
load model from cfimdb-classifier.pt
|
35 |
-
load 245 data from data/ids-cfimdb-dev.csv
|
36 |
-
DONE DEV
|
37 |
-
DONE Test
|
38 |
-
dev acc :: 0.873
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
unsup_simcse.py
CHANGED
@@ -203,14 +203,14 @@ def train(args):
|
|
203 |
def get_args():
|
204 |
parser = argparse.ArgumentParser()
|
205 |
parser.add_argument("--seed", type=int, default=11711)
|
206 |
-
parser.add_argument("--num-cpu-cores", type=int, default=
|
207 |
parser.add_argument("--epochs", type=int, default=10)
|
208 |
parser.add_argument("--use_gpu", action='store_true')
|
209 |
parser.add_argument("--batch_size_cse", type=int, default=8)
|
210 |
parser.add_argument("--batch_size_sst", type=int, default=64)
|
211 |
parser.add_argument("--batch_size_cfimdb", type=int, default=8)
|
212 |
parser.add_argument("--hidden_dropout_prob", type=float, default=0.3)
|
213 |
-
parser.add_argument("--lr_cse", type=float, default=
|
214 |
parser.add_argument("--lr_classifier", type=float, default=1e-5)
|
215 |
|
216 |
args = parser.parse_args()
|
|
|
203 |
def get_args():
|
204 |
parser = argparse.ArgumentParser()
|
205 |
parser.add_argument("--seed", type=int, default=11711)
|
206 |
+
parser.add_argument("--num-cpu-cores", type=int, default=8)
|
207 |
parser.add_argument("--epochs", type=int, default=10)
|
208 |
parser.add_argument("--use_gpu", action='store_true')
|
209 |
parser.add_argument("--batch_size_cse", type=int, default=8)
|
210 |
parser.add_argument("--batch_size_sst", type=int, default=64)
|
211 |
parser.add_argument("--batch_size_cfimdb", type=int, default=8)
|
212 |
parser.add_argument("--hidden_dropout_prob", type=float, default=0.3)
|
213 |
+
parser.add_argument("--lr_cse", type=float, default=1e-5)
|
214 |
parser.add_argument("--lr_classifier", type=float, default=1e-5)
|
215 |
|
216 |
args = parser.parse_args()
|