ProgramInNonsense commited on
Commit
10c2de2
·
verified ·
1 Parent(s): cbfa7ae

Training in progress, step 2850, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34a5bba41906f9f428e6b9882dfd4d856b56c484f938adb5b91c13f807825343
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19ebc916102bd897c5a532ac3f58497235a1148be37ea6a1ea9db19d99ca1db5
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2fac45a83552e6f54d1d48788ee1ad99cce2a8a3748b5d5279def750c7f2649
3
  size 640010002
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d55e197c8f406a9f22113b0e432bce93d4a21dd8f9d620ac4152d1d3b245d71b
3
  size 640010002
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7c3255f314f041e3de52c6733809edd66559e9181ef4ef8f2fba5b079b8c283
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:907e1adaa4ec46d2f92c4fd8268f0e5676d486ec2724a1d5fa466592b50eb492
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e8e814a3ea7a3fec5d7f1032ab27d6ea4d3652fa198b69d7c23c46d7aaf8587
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b8777906a662e6dafe00feb5f2bf980ad201a49ea0c6fff31eeab815bd530b6
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.2451845407485962,
3
  "best_model_checkpoint": "./output/checkpoint-1950",
4
- "epoch": 3.982300884955752,
5
  "eval_steps": 150,
6
- "global_step": 2700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2041,6 +2041,119 @@
2041
  "eval_samples_per_second": 6.928,
2042
  "eval_steps_per_second": 6.928,
2043
  "step": 2700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2044
  }
2045
  ],
2046
  "logging_steps": 10,
@@ -2060,7 +2173,7 @@
2060
  "attributes": {}
2061
  }
2062
  },
2063
- "total_flos": 5.030623090821857e+17,
2064
  "train_batch_size": 4,
2065
  "trial_name": null,
2066
  "trial_params": null
 
1
  {
2
  "best_metric": 0.2451845407485962,
3
  "best_model_checkpoint": "./output/checkpoint-1950",
4
+ "epoch": 4.20353982300885,
5
  "eval_steps": 150,
6
+ "global_step": 2850,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2041
  "eval_samples_per_second": 6.928,
2042
  "eval_steps_per_second": 6.928,
2043
  "step": 2700
2044
+ },
2045
+ {
2046
+ "epoch": 3.9970501474926254,
2047
+ "grad_norm": 0.3948822021484375,
2048
+ "learning_rate": 1.851294233700798e-05,
2049
+ "loss": 0.058,
2050
+ "step": 2710
2051
+ },
2052
+ {
2053
+ "epoch": 4.011799410029498,
2054
+ "grad_norm": 2.1759111881256104,
2055
+ "learning_rate": 1.8381446401780052e-05,
2056
+ "loss": 0.0544,
2057
+ "step": 2720
2058
+ },
2059
+ {
2060
+ "epoch": 4.0265486725663715,
2061
+ "grad_norm": 1.0792875289916992,
2062
+ "learning_rate": 1.825004269025315e-05,
2063
+ "loss": 0.0438,
2064
+ "step": 2730
2065
+ },
2066
+ {
2067
+ "epoch": 4.041297935103245,
2068
+ "grad_norm": 0.6035469770431519,
2069
+ "learning_rate": 1.811873660391918e-05,
2070
+ "loss": 0.068,
2071
+ "step": 2740
2072
+ },
2073
+ {
2074
+ "epoch": 4.056047197640118,
2075
+ "grad_norm": 2.127488613128662,
2076
+ "learning_rate": 1.7987533540257062e-05,
2077
+ "loss": 0.0842,
2078
+ "step": 2750
2079
+ },
2080
+ {
2081
+ "epoch": 4.070796460176991,
2082
+ "grad_norm": 0.9804584980010986,
2083
+ "learning_rate": 1.7856438892510862e-05,
2084
+ "loss": 0.0512,
2085
+ "step": 2760
2086
+ },
2087
+ {
2088
+ "epoch": 4.0855457227138645,
2089
+ "grad_norm": 0.3105282187461853,
2090
+ "learning_rate": 1.772545804946807e-05,
2091
+ "loss": 0.0632,
2092
+ "step": 2770
2093
+ },
2094
+ {
2095
+ "epoch": 4.100294985250738,
2096
+ "grad_norm": 0.9263339638710022,
2097
+ "learning_rate": 1.759459639523813e-05,
2098
+ "loss": 0.0495,
2099
+ "step": 2780
2100
+ },
2101
+ {
2102
+ "epoch": 4.115044247787611,
2103
+ "grad_norm": 0.42594772577285767,
2104
+ "learning_rate": 1.7463859309031106e-05,
2105
+ "loss": 0.0467,
2106
+ "step": 2790
2107
+ },
2108
+ {
2109
+ "epoch": 4.129793510324483,
2110
+ "grad_norm": 1.5321959257125854,
2111
+ "learning_rate": 1.7333252164936557e-05,
2112
+ "loss": 0.0622,
2113
+ "step": 2800
2114
+ },
2115
+ {
2116
+ "epoch": 4.144542772861357,
2117
+ "grad_norm": 0.47825008630752563,
2118
+ "learning_rate": 1.7202780331702608e-05,
2119
+ "loss": 0.0667,
2120
+ "step": 2810
2121
+ },
2122
+ {
2123
+ "epoch": 4.15929203539823,
2124
+ "grad_norm": 2.095520496368408,
2125
+ "learning_rate": 1.70724491725153e-05,
2126
+ "loss": 0.063,
2127
+ "step": 2820
2128
+ },
2129
+ {
2130
+ "epoch": 4.174041297935103,
2131
+ "grad_norm": 0.4359021782875061,
2132
+ "learning_rate": 1.694226404477812e-05,
2133
+ "loss": 0.0429,
2134
+ "step": 2830
2135
+ },
2136
+ {
2137
+ "epoch": 4.188790560471976,
2138
+ "grad_norm": 2.7264065742492676,
2139
+ "learning_rate": 1.681223029989177e-05,
2140
+ "loss": 0.0501,
2141
+ "step": 2840
2142
+ },
2143
+ {
2144
+ "epoch": 4.20353982300885,
2145
+ "grad_norm": 0.8709071278572083,
2146
+ "learning_rate": 1.66823532830342e-05,
2147
+ "loss": 0.0464,
2148
+ "step": 2850
2149
+ },
2150
+ {
2151
+ "epoch": 4.20353982300885,
2152
+ "eval_loss": 0.29095226526260376,
2153
+ "eval_runtime": 43.6544,
2154
+ "eval_samples_per_second": 6.918,
2155
+ "eval_steps_per_second": 6.918,
2156
+ "step": 2850
2157
  }
2158
  ],
2159
  "logging_steps": 10,
 
2173
  "attributes": {}
2174
  }
2175
  },
2176
+ "total_flos": 5.313588061300654e+17,
2177
  "train_batch_size": 4,
2178
  "trial_name": null,
2179
  "trial_params": null