sonoisa commited on
Commit
6e39e37
·
1 Parent(s): 1ace848

Update text encoder model

Browse files
output_linear.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4da2aadf0a1303ba85d0834450c14c8a396cd43fdb11fb182dcdfd47989a2986
3
  size 4721639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d49a6e24d051ee5dc3490ed4927513abd4c46e32bc479188b00648e1ef311a4d
3
  size 4721639
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f78cd6a86b5a818d2971418d3f073417e38c2296aaea36110dcf4e474f8faa9
3
  size 442547953
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:762c0f6542b607e5f606ce41f21ca995cc456a0ab1c9bbf507f75d022add985a
3
  size 442547953
training_args_1.json CHANGED
@@ -2,7 +2,7 @@
2
  "adam_epsilon": 1e-08,
3
  "data_dir": "/content/data",
4
  "early_stop_callback": false,
5
- "eval_batch_size": 8,
6
  "fp_16": false,
7
  "gradient_accumulation_steps": 4,
8
  "learning_rate": 0.001,
@@ -10,7 +10,7 @@
10
  "max_input_length": 64,
11
  "model_name_or_path": "cl-tohoku/bert-base-japanese-whole-word-masking",
12
  "n_gpu": 1,
13
- "num_train_epochs": 2,
14
  "seed": 42,
15
  "shuffle_buffer_size": 65536,
16
  "tokenizer_name_or_path": "cl-tohoku/bert-base-japanese-whole-word-masking",
 
2
  "adam_epsilon": 1e-08,
3
  "data_dir": "/content/data",
4
  "early_stop_callback": false,
5
+ "eval_batch_size": 32,
6
  "fp_16": false,
7
  "gradient_accumulation_steps": 4,
8
  "learning_rate": 0.001,
 
10
  "max_input_length": 64,
11
  "model_name_or_path": "cl-tohoku/bert-base-japanese-whole-word-masking",
12
  "n_gpu": 1,
13
+ "num_train_epochs": 1,
14
  "seed": 42,
15
  "shuffle_buffer_size": 65536,
16
  "tokenizer_name_or_path": "cl-tohoku/bert-base-japanese-whole-word-masking",
training_args_2.json CHANGED
@@ -10,8 +10,8 @@
10
  "max_input_length": 64,
11
  "model_name_or_path": "/content/pretrain_model",
12
  "n_gpu": 1,
13
- "num_train_epochs": 2,
14
- "seed": 42,
15
  "shuffle_buffer_size": 65536,
16
  "tokenizer_name_or_path": "/content/pretrain_model",
17
  "train_batch_size": 256,
 
10
  "max_input_length": 64,
11
  "model_name_or_path": "/content/pretrain_model",
12
  "n_gpu": 1,
13
+ "num_train_epochs": 1,
14
+ "seed": 12345,
15
  "shuffle_buffer_size": 65536,
16
  "tokenizer_name_or_path": "/content/pretrain_model",
17
  "train_batch_size": 256,
training_args_3.json DELETED
@@ -1,20 +0,0 @@
1
- {
2
- "adam_epsilon": 1e-08,
3
- "data_dir": "/content/data",
4
- "early_stop_callback": false,
5
- "eval_batch_size": 32,
6
- "fp_16": false,
7
- "gradient_accumulation_steps": 4,
8
- "learning_rate": 0.0002,
9
- "max_grad_norm": 1.0,
10
- "max_input_length": 64,
11
- "model_name_or_path": "/content/pretrain_model",
12
- "n_gpu": 1,
13
- "num_train_epochs": 2,
14
- "seed": 12345,
15
- "shuffle_buffer_size": 65536,
16
- "tokenizer_name_or_path": "/content/pretrain_model",
17
- "train_batch_size": 256,
18
- "warmup_ratio": 0.05,
19
- "weight_decay": 0.0
20
- }