SystemAdmin123 commited on
Commit
622ec42
·
verified ·
1 Parent(s): 980f6a5

Training in progress, step 80, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:159277feec320a7592284f9e3696651e378c75cb98a9616d5fc9a41efc0eec40
3
  size 183784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8537667eb2c1590a1b2a1c34f2de9292e65cc7ace043f57a94bbce298c07cee
3
  size 183784
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c71a9e87aa3c595eecb7f30d889af53933360e76b6456ae393643c7d90dcacb
3
  size 236760
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ff737ce4bd3480df082e35a2ceac05f083a433be3eb3fc3fc4b55760a97bcda
3
  size 236760
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99ae411404197432466fcb959effa7956b7aabac6b0ee3018ce67d44282b87b1
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db3e559c298d4a2aa71fc2be31e00e408588329761236aec5fe912fd29d2384d
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e0c13bbed523a6d7bec142d7a3836e9629b2dc23935ee4a5086689a05f762e6
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7417d3df19f914d5ff3de50e1ce3e883c2e459b2dba528703683d4a328a4e0f
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29316b978407a35ab6f860f3a2bcf442e67f1f9bd92ef1016961e6d3aa0c3d14
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c3395b638fca0417167bebc767ae35bf6c7bb1a639f0851b624a26e9c08419e
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45e9c496f59967ab95befd85ccf8b9fef5104a06d33cdbe2714ed501882c6167
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f14ae5014776de6223329b6dc8f9efc58d39f88f71b06e9dacbef3ff996556e
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcaaab9b4e9d97f524192da2cde7b8ea63f0956124955e5031658e7310a4fbcd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aab4ba69e83f65129bdf14a330e6b14308f6903214fa4aa747256ae7ff2c663e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3793103448275863,
5
  "eval_steps": 16,
6
- "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -59,6 +59,58 @@
59
  "learning_rate": 8.945702546981969e-05,
60
  "loss": 11.9214,
61
  "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  }
63
  ],
64
  "logging_steps": 10,
@@ -78,7 +130,7 @@
78
  "attributes": {}
79
  }
80
  },
81
- "total_flos": 647500922880.0,
82
  "train_batch_size": 7,
83
  "trial_name": null,
84
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.7586206896551726,
5
  "eval_steps": 16,
6
+ "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
59
  "learning_rate": 8.945702546981969e-05,
60
  "loss": 11.9214,
61
  "step": 40
62
+ },
63
+ {
64
+ "epoch": 1.6551724137931034,
65
+ "eval_loss": 11.91930103302002,
66
+ "eval_runtime": 0.8997,
67
+ "eval_samples_per_second": 167.834,
68
+ "eval_steps_per_second": 6.669,
69
+ "step": 48
70
+ },
71
+ {
72
+ "epoch": 1.7241379310344827,
73
+ "grad_norm": 0.024184564128518105,
74
+ "learning_rate": 8.231496189304704e-05,
75
+ "loss": 11.9204,
76
+ "step": 50
77
+ },
78
+ {
79
+ "epoch": 2.0689655172413794,
80
+ "grad_norm": 0.029018325731158257,
81
+ "learning_rate": 7.379736965185368e-05,
82
+ "loss": 11.9198,
83
+ "step": 60
84
+ },
85
+ {
86
+ "epoch": 2.206896551724138,
87
+ "eval_loss": 11.917800903320312,
88
+ "eval_runtime": 0.8209,
89
+ "eval_samples_per_second": 183.936,
90
+ "eval_steps_per_second": 7.309,
91
+ "step": 64
92
+ },
93
+ {
94
+ "epoch": 2.413793103448276,
95
+ "grad_norm": 0.025336025282740593,
96
+ "learning_rate": 6.426681121245527e-05,
97
+ "loss": 11.919,
98
+ "step": 70
99
+ },
100
+ {
101
+ "epoch": 2.7586206896551726,
102
+ "grad_norm": 0.027750149369239807,
103
+ "learning_rate": 5.4128967273616625e-05,
104
+ "loss": 11.9188,
105
+ "step": 80
106
+ },
107
+ {
108
+ "epoch": 2.7586206896551726,
109
+ "eval_loss": 11.916128158569336,
110
+ "eval_runtime": 0.8846,
111
+ "eval_samples_per_second": 170.691,
112
+ "eval_steps_per_second": 6.782,
113
+ "step": 80
114
  }
115
  ],
116
  "logging_steps": 10,
 
130
  "attributes": {}
131
  }
132
  },
133
+ "total_flos": 1295001845760.0,
134
  "train_batch_size": 7,
135
  "trial_name": null,
136
  "trial_params": null