mutisya commited on
Commit
32509fb
1 Parent(s): 78d0217

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6202b6729e849f999a2da567e39e209d65ff06acefa48347a5de581d1afa97b8
3
  size 2460359008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a87421e3a8f20612521e71c423b544bda9fcc2c6ee20a4d4377f5414bfcee9c
3
  size 2460359008
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0809e1c7430feef9833b1b5e895140d9aa81eb8e245c809f0c2d14fca2557f16
3
  size 4921031637
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f6174299f560006cde8a99fa3a1b8ce5a5a9e1e4c9fde7e935c38eca1925783
3
  size 4921031637
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4603ba4754768852e8e67884f4b3a481d448df5b673b3e45c865854392a1e490
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d10ef6f507f8ebfde960deac8bce9425cf6a51cacda5ba79a18f18b45f0b37e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9cda04b8cffed27e7df023ddfed6b3b6232fbb96b9772b53f605b6e6e25fbbd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cec5952bdc21c397c50099196b21c11fa80846c74cee21ea9ac6f9c9105df231
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 2439,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -45,6 +45,96 @@
45
  "eval_samples_per_second": 15.763,
46
  "eval_steps_per_second": 1.971,
47
  "step": 2439
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  }
49
  ],
50
  "logging_steps": 500,
@@ -64,7 +154,7 @@
64
  "attributes": {}
65
  }
66
  },
67
- "total_flos": 1.0845190328745984e+16,
68
  "train_batch_size": 8,
69
  "trial_name": null,
70
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 7317,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
45
  "eval_samples_per_second": 15.763,
46
  "eval_steps_per_second": 1.971,
47
  "step": 2439
48
+ },
49
+ {
50
+ "epoch": 1.0250102501025011,
51
+ "grad_norm": 0.6163414716720581,
52
+ "learning_rate": 3.974989749897499e-05,
53
+ "loss": 0.515,
54
+ "step": 2500
55
+ },
56
+ {
57
+ "epoch": 1.2300123001230012,
58
+ "grad_norm": 0.5940575003623962,
59
+ "learning_rate": 3.77039770397704e-05,
60
+ "loss": 0.4555,
61
+ "step": 3000
62
+ },
63
+ {
64
+ "epoch": 1.4350143501435015,
65
+ "grad_norm": 0.6619935035705566,
66
+ "learning_rate": 3.5653956539565396e-05,
67
+ "loss": 0.4512,
68
+ "step": 3500
69
+ },
70
+ {
71
+ "epoch": 1.6400164001640016,
72
+ "grad_norm": 0.5652098655700684,
73
+ "learning_rate": 3.36039360393604e-05,
74
+ "loss": 0.4493,
75
+ "step": 4000
76
+ },
77
+ {
78
+ "epoch": 1.8450184501845017,
79
+ "grad_norm": 0.7729639410972595,
80
+ "learning_rate": 3.1553915539155394e-05,
81
+ "loss": 0.4466,
82
+ "step": 4500
83
+ },
84
+ {
85
+ "epoch": 2.0,
86
+ "eval_bleu_eng_Latn-swh_Latn": 73.66313556328497,
87
+ "eval_bleu_swh_Latn-eng_Latn": 60.741238853474165,
88
+ "eval_loss": 0.46277186274528503,
89
+ "eval_runtime": 848.8954,
90
+ "eval_samples_per_second": 16.4,
91
+ "eval_steps_per_second": 2.051,
92
+ "step": 4878
93
+ },
94
+ {
95
+ "epoch": 2.0500205002050023,
96
+ "grad_norm": 0.5829837322235107,
97
+ "learning_rate": 2.950389503895039e-05,
98
+ "loss": 0.4288,
99
+ "step": 5000
100
+ },
101
+ {
102
+ "epoch": 2.2550225502255024,
103
+ "grad_norm": 0.6862415671348572,
104
+ "learning_rate": 2.745387453874539e-05,
105
+ "loss": 0.3921,
106
+ "step": 5500
107
+ },
108
+ {
109
+ "epoch": 2.4600246002460024,
110
+ "grad_norm": 0.6407959461212158,
111
+ "learning_rate": 2.5403854038540385e-05,
112
+ "loss": 0.3894,
113
+ "step": 6000
114
+ },
115
+ {
116
+ "epoch": 2.6650266502665025,
117
+ "grad_norm": 0.685972273349762,
118
+ "learning_rate": 2.3353833538335385e-05,
119
+ "loss": 0.386,
120
+ "step": 6500
121
+ },
122
+ {
123
+ "epoch": 2.870028700287003,
124
+ "grad_norm": 0.5890597701072693,
125
+ "learning_rate": 2.1303813038130384e-05,
126
+ "loss": 0.3875,
127
+ "step": 7000
128
+ },
129
+ {
130
+ "epoch": 3.0,
131
+ "eval_bleu_eng_Latn-swh_Latn": 74.8639644432928,
132
+ "eval_bleu_swh_Latn-eng_Latn": 61.05959270054286,
133
+ "eval_loss": 0.4528512954711914,
134
+ "eval_runtime": 843.9728,
135
+ "eval_samples_per_second": 16.496,
136
+ "eval_steps_per_second": 2.063,
137
+ "step": 7317
138
  }
139
  ],
140
  "logging_steps": 500,
 
154
  "attributes": {}
155
  }
156
  },
157
+ "total_flos": 3.253311606305587e+16,
158
  "train_batch_size": 8,
159
  "trial_name": null,
160
  "trial_params": null