GlowCheese commited on
Commit
dfcb0a5
·
1 Parent(s): 897eef4

retrain on last-linear-layer

Browse files
classifier.py CHANGED
@@ -13,7 +13,7 @@ from bert import BertModel
13
  from optimizer import AdamW
14
 
15
 
16
- TQDM_DISABLE=False
17
 
18
 
19
  # Fix the random seed.
@@ -340,14 +340,14 @@ def test(args):
340
  def get_args():
341
  parser = argparse.ArgumentParser()
342
  parser.add_argument("--seed", type=int, default=11711)
343
- parser.add_argument("--num-cpu-cores", type=int, default=4)
344
  parser.add_argument("--epochs", type=int, default=10)
345
  parser.add_argument("--fine-tune-mode", type=str,
346
  help='last-linear-layer: the BERT parameters are frozen and the task specific head parameters are updated; full-model: BERT parameters are updated as well',
347
  choices=('last-linear-layer', 'full-model'), default="last-linear-layer")
348
  parser.add_argument("--use_gpu", action='store_true')
349
 
350
- parser.add_argument("--batch_size_sst", help='64 can fit a 12GB GPU', type=int, default=8)
351
  parser.add_argument("--batch_size_cfimdb", help='8 can fit a 12GB GPU', type=int, default=8)
352
  parser.add_argument("--hidden_dropout_prob", type=float, default=0.3)
353
  parser.add_argument("--lr", type=float, help="learning rate, default lr for 'pretrain': 1e-3, 'finetune': 1e-5",
 
13
  from optimizer import AdamW
14
 
15
 
16
+ TQDM_DISABLE=True
17
 
18
 
19
  # Fix the random seed.
 
340
  def get_args():
341
  parser = argparse.ArgumentParser()
342
  parser.add_argument("--seed", type=int, default=11711)
343
+ parser.add_argument("--num-cpu-cores", type=int, default=8)
344
  parser.add_argument("--epochs", type=int, default=10)
345
  parser.add_argument("--fine-tune-mode", type=str,
346
  help='last-linear-layer: the BERT parameters are frozen and the task specific head parameters are updated; full-model: BERT parameters are updated as well',
347
  choices=('last-linear-layer', 'full-model'), default="last-linear-layer")
348
  parser.add_argument("--use_gpu", action='store_true')
349
 
350
+ parser.add_argument("--batch_size_sst", help='64 can fit a 12GB GPU', type=int, default=64)
351
  parser.add_argument("--batch_size_cfimdb", help='8 can fit a 12GB GPU', type=int, default=8)
352
  parser.add_argument("--hidden_dropout_prob", type=float, default=0.3)
353
  parser.add_argument("--lr", type=float, help="learning rate, default lr for 'pretrain': 1e-3, 'finetune': 1e-5",
justfile CHANGED
@@ -1 +1,14 @@
1
- python classifier.py --num-cpu-cores 8 --batch_size_sst 64 --batch_size_cfimdb 8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Testing on Google Cloud VM with no GPU and 8 cpu cores.
2
+
3
+ # If this doesn't meet your need, add the --use_gpu
4
+ # or --num-cpu-cores arguments to the existing commands.
5
+
6
+
7
+ default:
8
+ @just --list
9
+
10
+ last-linear:
11
+ python classifier.py
12
+
13
+ full-model:
14
+ python classifier.py --fine-tune-mode full-model --lr 1e-5
trainings/last-layer-no-dropout.txt DELETED
@@ -1,38 +0,0 @@
1
- Training Sentiment Classifier on SST...
2
- load 8544 data from data/ids-sst-train.csv
3
- load 1101 data from data/ids-sst-dev.csv
4
- Epoch 0: train loss :: 1.429, train acc :: 0.454, dev acc :: 0.441
5
- Epoch 1: train loss :: 1.297, train acc :: 0.467, dev acc :: 0.431
6
- Epoch 2: train loss :: 1.253, train acc :: 0.489, dev acc :: 0.450
7
- Epoch 3: train loss :: 1.233, train acc :: 0.491, dev acc :: 0.455
8
- Epoch 4: train loss :: 1.214, train acc :: 0.501, dev acc :: 0.450
9
- Epoch 5: train loss :: 1.211, train acc :: 0.511, dev acc :: 0.465
10
- Epoch 6: train loss :: 1.199, train acc :: 0.515, dev acc :: 0.478
11
- Epoch 7: train loss :: 1.192, train acc :: 0.518, dev acc :: 0.481
12
- Epoch 8: train loss :: 1.191, train acc :: 0.513, dev acc :: 0.467
13
- Epoch 9: train loss :: 1.191, train acc :: 0.505, dev acc :: 0.448
14
- Evaluating on SST...
15
- load model from sst-classifier.pt
16
- load 1101 data from data/ids-sst-dev.csv
17
- DONE DEV
18
- DONE Test
19
- dev acc :: 0.481
20
- Training Sentiment Classifier on cfimdb...
21
- load 1707 data from data/ids-cfimdb-train.csv
22
- load 245 data from data/ids-cfimdb-dev.csv
23
- Epoch 0: train loss :: 0.574, train acc :: 0.821, dev acc :: 0.829
24
- Epoch 1: train loss :: 0.466, train acc :: 0.866, dev acc :: 0.857
25
- Epoch 2: train loss :: 0.419, train acc :: 0.872, dev acc :: 0.873
26
- Epoch 3: train loss :: 0.386, train acc :: 0.878, dev acc :: 0.833
27
- Epoch 4: train loss :: 0.373, train acc :: 0.899, dev acc :: 0.849
28
- Epoch 5: train loss :: 0.357, train acc :: 0.893, dev acc :: 0.865
29
- Epoch 6: train loss :: 0.342, train acc :: 0.905, dev acc :: 0.873
30
- Epoch 7: train loss :: 0.334, train acc :: 0.906, dev acc :: 0.873
31
- Epoch 8: train loss :: 0.345, train acc :: 0.892, dev acc :: 0.824
32
- Epoch 9: train loss :: 0.321, train acc :: 0.888, dev acc :: 0.820
33
- Evaluating on cfimdb...
34
- load model from cfimdb-classifier.pt
35
- load 245 data from data/ids-cfimdb-dev.csv
36
- DONE DEV
37
- DONE Test
38
- dev acc :: 0.873
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
trainings/last-layer-w-dropout.txt DELETED
@@ -1,38 +0,0 @@
1
- Training Sentiment Classifier on SST...
2
- load 8544 data from data/ids-sst-train.csv
3
- load 1101 data from data/ids-sst-dev.csv
4
- Epoch 0: train loss :: 1.458, train acc :: 0.460, dev acc :: 0.442
5
- Epoch 1: train loss :: 1.331, train acc :: 0.472, dev acc :: 0.440
6
- Epoch 2: train loss :: 1.288, train acc :: 0.476, dev acc :: 0.447
7
- Epoch 3: train loss :: 1.269, train acc :: 0.490, dev acc :: 0.457
8
- Epoch 4: train loss :: 1.252, train acc :: 0.485, dev acc :: 0.446
9
- Epoch 5: train loss :: 1.242, train acc :: 0.487, dev acc :: 0.447
10
- Epoch 6: train loss :: 1.235, train acc :: 0.511, dev acc :: 0.472
11
- Epoch 7: train loss :: 1.235, train acc :: 0.512, dev acc :: 0.465
12
- Epoch 8: train loss :: 1.235, train acc :: 0.512, dev acc :: 0.472
13
- Epoch 9: train loss :: 1.227, train acc :: 0.509, dev acc :: 0.475
14
- Evaluating on SST...
15
- load model from sst-classifier.pt
16
- load 1101 data from data/ids-sst-dev.csv
17
- DONE DEV
18
- DONE Test
19
- dev acc :: 0.475
20
- Training Sentiment Classifier on cfimdb...
21
- load 1707 data from data/ids-cfimdb-train.csv
22
- load 245 data from data/ids-cfimdb-dev.csv
23
- Epoch 0: train loss :: 0.590, train acc :: 0.819, dev acc :: 0.849
24
- Epoch 1: train loss :: 0.510, train acc :: 0.826, dev acc :: 0.845
25
- Epoch 2: train loss :: 0.459, train acc :: 0.848, dev acc :: 0.853
26
- Epoch 3: train loss :: 0.438, train acc :: 0.880, dev acc :: 0.857
27
- Epoch 4: train loss :: 0.413, train acc :: 0.876, dev acc :: 0.869
28
- Epoch 5: train loss :: 0.406, train acc :: 0.890, dev acc :: 0.833
29
- Epoch 6: train loss :: 0.401, train acc :: 0.893, dev acc :: 0.845
30
- Epoch 7: train loss :: 0.403, train acc :: 0.870, dev acc :: 0.861
31
- Epoch 8: train loss :: 0.393, train acc :: 0.879, dev acc :: 0.865
32
- Epoch 9: train loss :: 0.407, train acc :: 0.895, dev acc :: 0.873
33
- Evaluating on cfimdb...
34
- load model from cfimdb-classifier.pt
35
- load 245 data from data/ids-cfimdb-dev.csv
36
- DONE DEV
37
- DONE Test
38
- dev acc :: 0.873
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
unsup_simcse.py CHANGED
@@ -203,14 +203,14 @@ def train(args):
203
  def get_args():
204
  parser = argparse.ArgumentParser()
205
  parser.add_argument("--seed", type=int, default=11711)
206
- parser.add_argument("--num-cpu-cores", type=int, default=4)
207
  parser.add_argument("--epochs", type=int, default=10)
208
  parser.add_argument("--use_gpu", action='store_true')
209
  parser.add_argument("--batch_size_cse", type=int, default=8)
210
  parser.add_argument("--batch_size_sst", type=int, default=64)
211
  parser.add_argument("--batch_size_cfimdb", type=int, default=8)
212
  parser.add_argument("--hidden_dropout_prob", type=float, default=0.3)
213
- parser.add_argument("--lr_cse", type=float, default=2e-5)
214
  parser.add_argument("--lr_classifier", type=float, default=1e-5)
215
 
216
  args = parser.parse_args()
 
203
  def get_args():
204
  parser = argparse.ArgumentParser()
205
  parser.add_argument("--seed", type=int, default=11711)
206
+ parser.add_argument("--num-cpu-cores", type=int, default=8)
207
  parser.add_argument("--epochs", type=int, default=10)
208
  parser.add_argument("--use_gpu", action='store_true')
209
  parser.add_argument("--batch_size_cse", type=int, default=8)
210
  parser.add_argument("--batch_size_sst", type=int, default=64)
211
  parser.add_argument("--batch_size_cfimdb", type=int, default=8)
212
  parser.add_argument("--hidden_dropout_prob", type=float, default=0.3)
213
+ parser.add_argument("--lr_cse", type=float, default=1e-5)
214
  parser.add_argument("--lr_classifier", type=float, default=1e-5)
215
 
216
  args = parser.parse_args()