Model save

Browse files

Files changed (4) hide show

README.md +57 -0
all_results.json +9 -0
train_results.json +9 -0
trainer_state.json +1073 -0

README.md ADDED Viewed

	@@ -0,0 +1,57 @@

+---
+base_model: mistralai/Mistral-7B-Instruct-v0.2
+library_name: transformers
+model_name: prometheus-7b-v1.5-beta-1-over1218-pref
+tags:
+- generated_from_trainer
+- trl
+- sft
+licence: license
+---
+# Model Card for prometheus-7b-v1.5-beta-1-over1218-pref
+This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="christineyu/prometheus-7b-v1.5-beta-1-over1218-pref", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/myexp/huggingface/runs/swm45e6g)
+This model was trained with SFT.
+### Framework versions
+- TRL: 0.12.2
+- Transformers: 4.46.3
+- Pytorch: 2.3.0
+- Datasets: 3.2.0
+- Tokenizers: 0.20.3
+## Citations
+Cite TRL as:
+```bibtex
+@misc{vonwerra2022trl,
+	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
+	year         = 2020,
+	journal      = {GitHub repository},
+	publisher    = {GitHub},
+	howpublished = {\url{https://github.com/huggingface/trl}}
+}
+```

all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 2.0,
+    "total_flos": 1.3020461702683034e+17,
+    "train_loss": 0.037445212856265314,
+    "train_runtime": 3051.2562,
+    "train_samples": 990,
+    "train_samples_per_second": 0.237,
+    "train_steps_per_second": 0.237
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 2.0,
+    "total_flos": 1.3020461702683034e+17,
+    "train_loss": 0.037445212856265314,
+    "train_runtime": 3051.2562,
+    "train_samples": 990,
+    "train_samples_per_second": 0.237,
+    "train_steps_per_second": 0.237
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1073 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 722,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.002770083102493075,
+      "grad_norm": 4.970390796661377,
+      "learning_rate": 1.36986301369863e-07,
+      "loss": 0.7038,
+      "step": 1
+    },
+    {
+      "epoch": 0.013850415512465374,
+      "grad_norm": 5.532266139984131,
+      "learning_rate": 6.849315068493151e-07,
+      "loss": 0.7051,
+      "step": 5
+    },
+    {
+      "epoch": 0.027700831024930747,
+      "grad_norm": 3.3267316818237305,
+      "learning_rate": 1.3698630136986302e-06,
+      "loss": 0.6794,
+      "step": 10
+    },
+    {
+      "epoch": 0.04155124653739612,
+      "grad_norm": 2.5890796184539795,
+      "learning_rate": 2.0547945205479454e-06,
+      "loss": 0.632,
+      "step": 15
+    },
+    {
+      "epoch": 0.055401662049861494,
+      "grad_norm": 2.3750393390655518,
+      "learning_rate": 2.7397260273972604e-06,
+      "loss": 0.5698,
+      "step": 20
+    },
+    {
+      "epoch": 0.06925207756232687,
+      "grad_norm": 2.339930295944214,
+      "learning_rate": 3.4246575342465754e-06,
+      "loss": 0.5216,
+      "step": 25
+    },
+    {
+      "epoch": 0.08310249307479224,
+      "grad_norm": 2.103806734085083,
+      "learning_rate": 4.109589041095891e-06,
+      "loss": 0.4612,
+      "step": 30
+    },
+    {
+      "epoch": 0.09695290858725762,
+      "grad_norm": 2.015624761581421,
+      "learning_rate": 4.7945205479452054e-06,
+      "loss": 0.3838,
+      "step": 35
+    },
+    {
+      "epoch": 0.11080332409972299,
+      "grad_norm": 2.146289587020874,
+      "learning_rate": 5.479452054794521e-06,
+      "loss": 0.2954,
+      "step": 40
+    },
+    {
+      "epoch": 0.12465373961218837,
+      "grad_norm": 2.125903844833374,
+      "learning_rate": 6.164383561643836e-06,
+      "loss": 0.1999,
+      "step": 45
+    },
+    {
+      "epoch": 0.13850415512465375,
+      "grad_norm": 2.214215040206909,
+      "learning_rate": 6.849315068493151e-06,
+      "loss": 0.1135,
+      "step": 50
+    },
+    {
+      "epoch": 0.1523545706371191,
+      "grad_norm": 3.1861069202423096,
+      "learning_rate": 7.534246575342466e-06,
+      "loss": 0.0612,
+      "step": 55
+    },
+    {
+      "epoch": 0.16620498614958448,
+      "grad_norm": 2.2513909339904785,
+      "learning_rate": 8.219178082191782e-06,
+      "loss": 0.0513,
+      "step": 60
+    },
+    {
+      "epoch": 0.18005540166204986,
+      "grad_norm": 1.5095714330673218,
+      "learning_rate": 8.904109589041097e-06,
+      "loss": 0.033,
+      "step": 65
+    },
+    {
+      "epoch": 0.19390581717451524,
+      "grad_norm": 1.3538941144943237,
+      "learning_rate": 9.589041095890411e-06,
+      "loss": 0.0338,
+      "step": 70
+    },
+    {
+      "epoch": 0.2077562326869806,
+      "grad_norm": 1.7012300491333008,
+      "learning_rate": 9.999765681294371e-06,
+      "loss": 0.0231,
+      "step": 75
+    },
+    {
+      "epoch": 0.22160664819944598,
+      "grad_norm": 0.762015700340271,
+      "learning_rate": 9.997129848070563e-06,
+      "loss": 0.0182,
+      "step": 80
+    },
+    {
+      "epoch": 0.23545706371191136,
+      "grad_norm": 1.277585744857788,
+      "learning_rate": 9.991566832387564e-06,
+      "loss": 0.0154,
+      "step": 85
+    },
+    {
+      "epoch": 0.24930747922437674,
+      "grad_norm": 0.8343765735626221,
+      "learning_rate": 9.983079892908332e-06,
+      "loss": 0.0185,
+      "step": 90
+    },
+    {
+      "epoch": 0.2631578947368421,
+      "grad_norm": 1.3323298692703247,
+      "learning_rate": 9.971674001050687e-06,
+      "loss": 0.0158,
+      "step": 95
+    },
+    {
+      "epoch": 0.2770083102493075,
+      "grad_norm": 0.4257211983203888,
+      "learning_rate": 9.957355838075188e-06,
+      "loss": 0.0154,
+      "step": 100
+    },
+    {
+      "epoch": 0.29085872576177285,
+      "grad_norm": 0.6481120586395264,
+      "learning_rate": 9.940133791171445e-06,
+      "loss": 0.0105,
+      "step": 105
+    },
+    {
+      "epoch": 0.3047091412742382,
+      "grad_norm": 0.8009067177772522,
+      "learning_rate": 9.920017948545109e-06,
+      "loss": 0.0111,
+      "step": 110
+    },
+    {
+      "epoch": 0.3185595567867036,
+      "grad_norm": 0.4706541895866394,
+      "learning_rate": 9.897020093508502e-06,
+      "loss": 0.0123,
+      "step": 115
+    },
+    {
+      "epoch": 0.33240997229916897,
+      "grad_norm": 0.7167349457740784,
+      "learning_rate": 9.871153697578254e-06,
+      "loss": 0.0129,
+      "step": 120
+    },
+    {
+      "epoch": 0.3462603878116344,
+      "grad_norm": 0.9122310876846313,
+      "learning_rate": 9.842433912584066e-06,
+      "loss": 0.0118,
+      "step": 125
+    },
+    {
+      "epoch": 0.3601108033240997,
+      "grad_norm": 0.3195926249027252,
+      "learning_rate": 9.810877561793178e-06,
+      "loss": 0.0091,
+      "step": 130
+    },
+    {
+      "epoch": 0.3739612188365651,
+      "grad_norm": 0.5710881352424622,
+      "learning_rate": 9.776503130055758e-06,
+      "loss": 0.0116,
+      "step": 135
+    },
+    {
+      "epoch": 0.3878116343490305,
+      "grad_norm": 0.6485520005226135,
+      "learning_rate": 9.739330752976981e-06,
+      "loss": 0.0129,
+      "step": 140
+    },
+    {
+      "epoch": 0.40166204986149584,
+      "grad_norm": 0.41829606890678406,
+      "learning_rate": 9.699382205122138e-06,
+      "loss": 0.0105,
+      "step": 145
+    },
+    {
+      "epoch": 0.4155124653739612,
+      "grad_norm": 0.4999130070209503,
+      "learning_rate": 9.656680887261693e-06,
+      "loss": 0.0096,
+      "step": 150
+    },
+    {
+      "epoch": 0.4293628808864266,
+      "grad_norm": 0.4883331060409546,
+      "learning_rate": 9.611251812663748e-06,
+      "loss": 0.0085,
+      "step": 155
+    },
+    {
+      "epoch": 0.44321329639889195,
+      "grad_norm": 0.33876070380210876,
+      "learning_rate": 9.563121592441949e-06,
+      "loss": 0.006,
+      "step": 160
+    },
+    {
+      "epoch": 0.45706371191135736,
+      "grad_norm": 0.9331468939781189,
+      "learning_rate": 9.512318419967427e-06,
+      "loss": 0.0062,
+      "step": 165
+    },
+    {
+      "epoch": 0.4709141274238227,
+      "grad_norm": 0.5563158988952637,
+      "learning_rate": 9.458872054353888e-06,
+      "loss": 0.0072,
+      "step": 170
+    },
+    {
+      "epoch": 0.48476454293628807,
+      "grad_norm": 0.7999743223190308,
+      "learning_rate": 9.402813803025526e-06,
+      "loss": 0.0063,
+      "step": 175
+    },
+    {
+      "epoch": 0.4986149584487535,
+      "grad_norm": 0.35912105441093445,
+      "learning_rate": 9.344176503378003e-06,
+      "loss": 0.0069,
+      "step": 180
+    },
+    {
+      "epoch": 0.5124653739612188,
+      "grad_norm": 0.3588772416114807,
+      "learning_rate": 9.282994503543185e-06,
+      "loss": 0.0068,
+      "step": 185
+    },
+    {
+      "epoch": 0.5263157894736842,
+      "grad_norm": 0.5413822531700134,
+      "learning_rate": 9.219303642268953e-06,
+      "loss": 0.0082,
+      "step": 190
+    },
+    {
+      "epoch": 0.5401662049861495,
+      "grad_norm": 0.32814571261405945,
+      "learning_rate": 9.153141227925828e-06,
+      "loss": 0.0057,
+      "step": 195
+    },
+    {
+      "epoch": 0.554016620498615,
+      "grad_norm": 0.48693302273750305,
+      "learning_rate": 9.084546016652758e-06,
+      "loss": 0.0078,
+      "step": 200
+    },
+    {
+      "epoch": 0.5678670360110804,
+      "grad_norm": 0.20453135669231415,
+      "learning_rate": 9.013558189654819e-06,
+      "loss": 0.0081,
+      "step": 205
+    },
+    {
+      "epoch": 0.5817174515235457,
+      "grad_norm": 0.32578709721565247,
+      "learning_rate": 8.940219329666167e-06,
+      "loss": 0.0048,
+      "step": 210
+    },
+    {
+      "epoch": 0.5955678670360111,
+      "grad_norm": 0.32217937707901,
+      "learning_rate": 8.864572396591996e-06,
+      "loss": 0.0053,
+      "step": 215
+    },
+    {
+      "epoch": 0.6094182825484764,
+      "grad_norm": 0.6116990447044373,
+      "learning_rate": 8.786661702343811e-06,
+      "loss": 0.0067,
+      "step": 220
+    },
+    {
+      "epoch": 0.6232686980609419,
+      "grad_norm": 0.6656084060668945,
+      "learning_rate": 8.706532884882704e-06,
+      "loss": 0.0067,
+      "step": 225
+    },
+    {
+      "epoch": 0.6371191135734072,
+      "grad_norm": 0.4221888482570648,
+      "learning_rate": 8.624232881485887e-06,
+      "loss": 0.0069,
+      "step": 230
+    },
+    {
+      "epoch": 0.6509695290858726,
+      "grad_norm": 0.35918885469436646,
+      "learning_rate": 8.539809901252118e-06,
+      "loss": 0.006,
+      "step": 235
+    },
+    {
+      "epoch": 0.6648199445983379,
+      "grad_norm": 0.3014688193798065,
+      "learning_rate": 8.453313396862113e-06,
+      "loss": 0.0052,
+      "step": 240
+    },
+    {
+      "epoch": 0.6786703601108033,
+      "grad_norm": 0.32176536321640015,
+      "learning_rate": 8.364794035610527e-06,
+      "loss": 0.0061,
+      "step": 245
+    },
+    {
+      "epoch": 0.6925207756232687,
+      "grad_norm": 0.2977988123893738,
+      "learning_rate": 8.274303669726427e-06,
+      "loss": 0.0051,
+      "step": 250
+    },
+    {
+      "epoch": 0.7063711911357341,
+      "grad_norm": 0.50001460313797,
+      "learning_rate": 8.181895305999665e-06,
+      "loss": 0.0072,
+      "step": 255
+    },
+    {
+      "epoch": 0.7202216066481995,
+      "grad_norm": 0.7377979159355164,
+      "learning_rate": 8.08762307473096e-06,
+      "loss": 0.0064,
+      "step": 260
+    },
+    {
+      "epoch": 0.7340720221606648,
+      "grad_norm": 0.3045377731323242,
+      "learning_rate": 7.991542198023827e-06,
+      "loss": 0.0051,
+      "step": 265
+    },
+    {
+      "epoch": 0.7479224376731302,
+      "grad_norm": 0.22468125820159912,
+      "learning_rate": 7.893708957436982e-06,
+      "loss": 0.0056,
+      "step": 270
+    },
+    {
+      "epoch": 0.7617728531855956,
+      "grad_norm": 0.4352708160877228,
+      "learning_rate": 7.794180661016143e-06,
+      "loss": 0.0042,
+      "step": 275
+    },
+    {
+      "epoch": 0.775623268698061,
+      "grad_norm": 0.29045531153678894,
+      "learning_rate": 7.693015609724524e-06,
+      "loss": 0.0065,
+      "step": 280
+    },
+    {
+      "epoch": 0.7894736842105263,
+      "grad_norm": 0.35637104511260986,
+      "learning_rate": 7.5902730632917395e-06,
+      "loss": 0.0047,
+      "step": 285
+    },
+    {
+      "epoch": 0.8033240997229917,
+      "grad_norm": 0.5587311387062073,
+      "learning_rate": 7.486013205501053e-06,
+      "loss": 0.0063,
+      "step": 290
+    },
+    {
+      "epoch": 0.817174515235457,
+      "grad_norm": 0.19924254715442657,
+      "learning_rate": 7.3802971089353696e-06,
+      "loss": 0.0055,
+      "step": 295
+    },
+    {
+      "epoch": 0.8310249307479224,
+      "grad_norm": 0.39876681566238403,
+      "learning_rate": 7.273186699202572e-06,
+      "loss": 0.0067,
+      "step": 300
+    },
+    {
+      "epoch": 0.8448753462603878,
+      "grad_norm": 0.6315239667892456,
+      "learning_rate": 7.164744718661198e-06,
+      "loss": 0.0045,
+      "step": 305
+    },
+    {
+      "epoch": 0.8587257617728532,
+      "grad_norm": 0.31891411542892456,
+      "learning_rate": 7.055034689667661e-06,
+      "loss": 0.0042,
+      "step": 310
+    },
+    {
+      "epoch": 0.8725761772853186,
+      "grad_norm": 0.34718984365463257,
+      "learning_rate": 6.944120877366605e-06,
+      "loss": 0.0045,
+      "step": 315
+    },
+    {
+      "epoch": 0.8864265927977839,
+      "grad_norm": 0.3197481632232666,
+      "learning_rate": 6.832068252046116e-06,
+      "loss": 0.0051,
+      "step": 320
+    },
+    {
+      "epoch": 0.9002770083102493,
+      "grad_norm": 0.298637717962265,
+      "learning_rate": 6.718942451079911e-06,
+      "loss": 0.0055,
+      "step": 325
+    },
+    {
+      "epoch": 0.9141274238227147,
+      "grad_norm": 0.3373955190181732,
+      "learning_rate": 6.604809740478748e-06,
+      "loss": 0.0045,
+      "step": 330
+    },
+    {
+      "epoch": 0.9279778393351801,
+      "grad_norm": 0.5179949402809143,
+      "learning_rate": 6.489736976073603e-06,
+      "loss": 0.0054,
+      "step": 335
+    },
+    {
+      "epoch": 0.9418282548476454,
+      "grad_norm": 0.31394144892692566,
+      "learning_rate": 6.3737915643533484e-06,
+      "loss": 0.0049,
+      "step": 340
+    },
+    {
+      "epoch": 0.9556786703601108,
+      "grad_norm": 0.25130563974380493,
+      "learning_rate": 6.257041422979871e-06,
+      "loss": 0.0039,
+      "step": 345
+    },
+    {
+      "epoch": 0.9695290858725761,
+      "grad_norm": 0.42873042821884155,
+      "learning_rate": 6.139554941003747e-06,
+      "loss": 0.0055,
+      "step": 350
+    },
+    {
+      "epoch": 0.9833795013850416,
+      "grad_norm": 0.2586114704608917,
+      "learning_rate": 6.021400938803813e-06,
+      "loss": 0.0042,
+      "step": 355
+    },
+    {
+      "epoch": 0.997229916897507,
+      "grad_norm": 0.3645874559879303,
+      "learning_rate": 5.902648627774059e-06,
+      "loss": 0.0058,
+      "step": 360
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.0030423467978835106,
+      "eval_runtime": 3.8207,
+      "eval_samples_per_second": 0.785,
+      "eval_steps_per_second": 0.785,
+      "step": 361
+    },
+    {
+      "epoch": 1.0110803324099722,
+      "grad_norm": 0.524590790271759,
+      "learning_rate": 5.783367569781474e-06,
+      "loss": 0.0053,
+      "step": 365
+    },
+    {
+      "epoch": 1.0249307479224377,
+      "grad_norm": 0.2261294275522232,
+      "learning_rate": 5.663627636418611e-06,
+      "loss": 0.0036,
+      "step": 370
+    },
+    {
+      "epoch": 1.0387811634349031,
+      "grad_norm": 0.16315680742263794,
+      "learning_rate": 5.543498968074704e-06,
+      "loss": 0.003,
+      "step": 375
+    },
+    {
+      "epoch": 1.0526315789473684,
+      "grad_norm": 0.23269863426685333,
+      "learning_rate": 5.423051932849348e-06,
+      "loss": 0.0027,
+      "step": 380
+    },
+    {
+      "epoch": 1.0664819944598338,
+      "grad_norm": 0.29580602049827576,
+      "learning_rate": 5.3023570853327725e-06,
+      "loss": 0.0037,
+      "step": 385
+    },
+    {
+      "epoch": 1.080332409972299,
+      "grad_norm": 0.2091536670923233,
+      "learning_rate": 5.181485125276898e-06,
+      "loss": 0.0023,
+      "step": 390
+    },
+    {
+      "epoch": 1.0941828254847645,
+      "grad_norm": 0.2704831063747406,
+      "learning_rate": 5.060506856181342e-06,
+      "loss": 0.0035,
+      "step": 395
+    },
+    {
+      "epoch": 1.10803324099723,
+      "grad_norm": 0.5069320201873779,
+      "learning_rate": 4.939493143818659e-06,
+      "loss": 0.0028,
+      "step": 400
+    },
+    {
+      "epoch": 1.1218836565096952,
+      "grad_norm": 0.7631614804267883,
+      "learning_rate": 4.818514874723103e-06,
+      "loss": 0.0042,
+      "step": 405
+    },
+    {
+      "epoch": 1.1357340720221607,
+      "grad_norm": 0.26314568519592285,
+      "learning_rate": 4.697642914667229e-06,
+      "loss": 0.0041,
+      "step": 410
+    },
+    {
+      "epoch": 1.149584487534626,
+      "grad_norm": 0.20495057106018066,
+      "learning_rate": 4.576948067150655e-06,
+      "loss": 0.003,
+      "step": 415
+    },
+    {
+      "epoch": 1.1634349030470914,
+      "grad_norm": 0.2521879971027374,
+      "learning_rate": 4.456501031925297e-06,
+      "loss": 0.0037,
+      "step": 420
+    },
+    {
+      "epoch": 1.1772853185595569,
+      "grad_norm": 0.23261719942092896,
+      "learning_rate": 4.336372363581391e-06,
+      "loss": 0.0027,
+      "step": 425
+    },
+    {
+      "epoch": 1.1911357340720221,
+      "grad_norm": 0.19368702173233032,
+      "learning_rate": 4.216632430218528e-06,
+      "loss": 0.0033,
+      "step": 430
+    },
+    {
+      "epoch": 1.2049861495844876,
+      "grad_norm": 0.5213963389396667,
+      "learning_rate": 4.097351372225943e-06,
+      "loss": 0.0028,
+      "step": 435
+    },
+    {
+      "epoch": 1.2188365650969528,
+      "grad_norm": 0.16888286173343658,
+      "learning_rate": 3.978599061196188e-06,
+      "loss": 0.003,
+      "step": 440
+    },
+    {
+      "epoch": 1.2326869806094183,
+      "grad_norm": 0.3124845027923584,
+      "learning_rate": 3.860445058996255e-06,
+      "loss": 0.0034,
+      "step": 445
+    },
+    {
+      "epoch": 1.2465373961218837,
+      "grad_norm": 0.25205326080322266,
+      "learning_rate": 3.7429585770201314e-06,
+      "loss": 0.0028,
+      "step": 450
+    },
+    {
+      "epoch": 1.260387811634349,
+      "grad_norm": 0.27026185393333435,
+      "learning_rate": 3.626208435646652e-06,
+      "loss": 0.0023,
+      "step": 455
+    },
+    {
+      "epoch": 1.2742382271468145,
+      "grad_norm": 0.19147123396396637,
+      "learning_rate": 3.5102630239263986e-06,
+      "loss": 0.0021,
+      "step": 460
+    },
+    {
+      "epoch": 1.2880886426592797,
+      "grad_norm": 0.21509826183319092,
+      "learning_rate": 3.395190259521254e-06,
+      "loss": 0.0039,
+      "step": 465
+    },
+    {
+      "epoch": 1.3019390581717452,
+      "grad_norm": 0.3184572160243988,
+      "learning_rate": 3.281057548920091e-06,
+      "loss": 0.0034,
+      "step": 470
+    },
+    {
+      "epoch": 1.3157894736842106,
+      "grad_norm": 0.3757064640522003,
+      "learning_rate": 3.1679317479538864e-06,
+      "loss": 0.0036,
+      "step": 475
+    },
+    {
+      "epoch": 1.3296398891966759,
+      "grad_norm": 0.23070411384105682,
+      "learning_rate": 3.0558791226333974e-06,
+      "loss": 0.002,
+      "step": 480
+    },
+    {
+      "epoch": 1.3434903047091413,
+      "grad_norm": 0.16266323626041412,
+      "learning_rate": 2.9449653103323405e-06,
+      "loss": 0.003,
+      "step": 485
+    },
+    {
+      "epoch": 1.3573407202216066,
+      "grad_norm": 0.235463485121727,
+      "learning_rate": 2.8352552813388035e-06,
+      "loss": 0.0022,
+      "step": 490
+    },
+    {
+      "epoch": 1.371191135734072,
+      "grad_norm": 0.21921995282173157,
+      "learning_rate": 2.7268133007974284e-06,
+      "loss": 0.0025,
+      "step": 495
+    },
+    {
+      "epoch": 1.3850415512465375,
+      "grad_norm": 0.17426297068595886,
+      "learning_rate": 2.6197028910646304e-06,
+      "loss": 0.0032,
+      "step": 500
+    },
+    {
+      "epoch": 1.3988919667590027,
+      "grad_norm": 0.14913178980350494,
+      "learning_rate": 2.5139867944989483e-06,
+      "loss": 0.0022,
+      "step": 505
+    },
+    {
+      "epoch": 1.4127423822714682,
+      "grad_norm": 0.37892332673072815,
+      "learning_rate": 2.409726936708263e-06,
+      "loss": 0.0024,
+      "step": 510
+    },
+    {
+      "epoch": 1.4265927977839334,
+      "grad_norm": 0.20658189058303833,
+      "learning_rate": 2.3069843902754767e-06,
+      "loss": 0.0034,
+      "step": 515
+    },
+    {
+      "epoch": 1.440443213296399,
+      "grad_norm": 0.29341772198677063,
+      "learning_rate": 2.205819338983859e-06,
+      "loss": 0.0031,
+      "step": 520
+    },
+    {
+      "epoch": 1.4542936288088644,
+      "grad_norm": 0.228977769613266,
+      "learning_rate": 2.106291042563019e-06,
+      "loss": 0.002,
+      "step": 525
+    },
+    {
+      "epoch": 1.4681440443213296,
+      "grad_norm": 0.2889719009399414,
+      "learning_rate": 2.0084578019761738e-06,
+      "loss": 0.0031,
+      "step": 530
+    },
+    {
+      "epoch": 1.481994459833795,
+      "grad_norm": 0.16132836043834686,
+      "learning_rate": 1.912376925269041e-06,
+      "loss": 0.0026,
+      "step": 535
+    },
+    {
+      "epoch": 1.4958448753462603,
+      "grad_norm": 0.3356161117553711,
+      "learning_rate": 1.8181046940003366e-06,
+      "loss": 0.0028,
+      "step": 540
+    },
+    {
+      "epoch": 1.5096952908587258,
+      "grad_norm": 0.4451793432235718,
+      "learning_rate": 1.7256963302735752e-06,
+      "loss": 0.0026,
+      "step": 545
+    },
+    {
+      "epoch": 1.5235457063711912,
+      "grad_norm": 0.30361247062683105,
+      "learning_rate": 1.635205964389474e-06,
+      "loss": 0.0016,
+      "step": 550
+    },
+    {
+      "epoch": 1.5373961218836565,
+      "grad_norm": 0.20500566065311432,
+      "learning_rate": 1.5466866031378874e-06,
+      "loss": 0.0029,
+      "step": 555
+    },
+    {
+      "epoch": 1.5512465373961217,
+      "grad_norm": 0.3303813636302948,
+      "learning_rate": 1.4601900987478834e-06,
+      "loss": 0.003,
+      "step": 560
+    },
+    {
+      "epoch": 1.5650969529085872,
+      "grad_norm": 0.20436729490756989,
+      "learning_rate": 1.3757671185141136e-06,
+      "loss": 0.0017,
+      "step": 565
+    },
+    {
+      "epoch": 1.5789473684210527,
+      "grad_norm": 0.33445170521736145,
+      "learning_rate": 1.2934671151172974e-06,
+      "loss": 0.0031,
+      "step": 570
+    },
+    {
+      "epoch": 1.5927977839335181,
+      "grad_norm": 0.2386503368616104,
+      "learning_rate": 1.213338297656191e-06,
+      "loss": 0.0026,
+      "step": 575
+    },
+    {
+      "epoch": 1.6066481994459834,
+      "grad_norm": 0.45957309007644653,
+      "learning_rate": 1.1354276034080059e-06,
+      "loss": 0.003,
+      "step": 580
+    },
+    {
+      "epoch": 1.6204986149584486,
+      "grad_norm": 0.2124543935060501,
+      "learning_rate": 1.0597806703338354e-06,
+      "loss": 0.0023,
+      "step": 585
+    },
+    {
+      "epoch": 1.634349030470914,
+      "grad_norm": 0.1889660805463791,
+      "learning_rate": 9.86441810345183e-07,
+      "loss": 0.0026,
+      "step": 590
+    },
+    {
+      "epoch": 1.6481994459833795,
+      "grad_norm": 0.3355896472930908,
+      "learning_rate": 9.154539833472442e-07,
+      "loss": 0.0023,
+      "step": 595
+    },
+    {
+      "epoch": 1.662049861495845,
+      "grad_norm": 0.20331183075904846,
+      "learning_rate": 8.468587720741728e-07,
+      "loss": 0.002,
+      "step": 600
+    },
+    {
+      "epoch": 1.6759002770083102,
+      "grad_norm": 0.5584515929222107,
+      "learning_rate": 7.80696357731049e-07,
+      "loss": 0.0027,
+      "step": 605
+    },
+    {
+      "epoch": 1.6897506925207755,
+      "grad_norm": 0.21782828867435455,
+      "learning_rate": 7.170054964568146e-07,
+      "loss": 0.0027,
+      "step": 610
+    },
+    {
+      "epoch": 1.703601108033241,
+      "grad_norm": 0.24219338595867157,
+      "learning_rate": 6.558234966219984e-07,
+      "loss": 0.0022,
+      "step": 615
+    },
+    {
+      "epoch": 1.7174515235457064,
+      "grad_norm": 0.32900330424308777,
+      "learning_rate": 5.971861969744758e-07,
+      "loss": 0.0031,
+      "step": 620
+    },
+    {
+      "epoch": 1.7313019390581719,
+      "grad_norm": 0.20752942562103271,
+      "learning_rate": 5.411279456461133e-07,
+      "loss": 0.0019,
+      "step": 625
+    },
+    {
+      "epoch": 1.745152354570637,
+      "grad_norm": 0.15013189613819122,
+      "learning_rate": 4.87681580032573e-07,
+      "loss": 0.0018,
+      "step": 630
+    },
+    {
+      "epoch": 1.7590027700831024,
+      "grad_norm": 0.2259117215871811,
+      "learning_rate": 4.368784075580512e-07,
+      "loss": 0.0028,
+      "step": 635
+    },
+    {
+      "epoch": 1.7728531855955678,
+      "grad_norm": 0.16545476019382477,
+      "learning_rate": 3.8874818733625363e-07,
+      "loss": 0.0024,
+      "step": 640
+    },
+    {
+      "epoch": 1.7867036011080333,
+      "grad_norm": 0.2805814743041992,
+      "learning_rate": 3.433191127383079e-07,
+      "loss": 0.0025,
+      "step": 645
+    },
+    {
+      "epoch": 1.8005540166204987,
+      "grad_norm": 0.24149565398693085,
+      "learning_rate": 3.0061779487786325e-07,
+      "loss": 0.0026,
+      "step": 650
+    },
+    {
+      "epoch": 1.814404432132964,
+      "grad_norm": 0.37586623430252075,
+      "learning_rate": 2.6066924702302044e-07,
+      "loss": 0.0027,
+      "step": 655
+    },
+    {
+      "epoch": 1.8282548476454292,
+      "grad_norm": 0.28574487566947937,
+      "learning_rate": 2.2349686994424303e-07,
+      "loss": 0.0022,
+      "step": 660
+    },
+    {
+      "epoch": 1.8421052631578947,
+      "grad_norm": 0.26507818698883057,
+      "learning_rate": 1.8912243820682296e-07,
+      "loss": 0.0025,
+      "step": 665
+    },
+    {
+      "epoch": 1.8559556786703602,
+      "grad_norm": 0.29346975684165955,
+      "learning_rate": 1.575660874159346e-07,
+      "loss": 0.0024,
+      "step": 670
+    },
+    {
+      "epoch": 1.8698060941828256,
+      "grad_norm": 0.37801891565322876,
+      "learning_rate": 1.2884630242174734e-07,
+      "loss": 0.003,
+      "step": 675
+    },
+    {
+      "epoch": 1.8836565096952909,
+      "grad_norm": 0.11035842448472977,
+      "learning_rate": 1.029799064914988e-07,
+      "loss": 0.0019,
+      "step": 680
+    },
+    {
+      "epoch": 1.897506925207756,
+      "grad_norm": 0.2686106860637665,
+      "learning_rate": 7.998205145489157e-08,
+      "loss": 0.0026,
+      "step": 685
+    },
+    {
+      "epoch": 1.9113573407202216,
+      "grad_norm": 0.3440103232860565,
+      "learning_rate": 5.986620882855676e-08,
+      "loss": 0.0031,
+      "step": 690
+    },
+    {
+      "epoch": 1.925207756232687,
+      "grad_norm": 0.2664523124694824,
+      "learning_rate": 4.2644161924811353e-08,
+      "loss": 0.0035,
+      "step": 695
+    },
+    {
+      "epoch": 1.9390581717451525,
+      "grad_norm": 0.22529973089694977,
+      "learning_rate": 2.8325998949314536e-08,
+      "loss": 0.0022,
+      "step": 700
+    },
+    {
+      "epoch": 1.9529085872576177,
+      "grad_norm": 0.167397141456604,
+      "learning_rate": 1.6920107091668582e-08,
+      "loss": 0.0021,
+      "step": 705
+    },
+    {
+      "epoch": 1.966759002770083,
+      "grad_norm": 0.3647945523262024,
+      "learning_rate": 8.433167612436066e-09,
+      "loss": 0.0021,
+      "step": 710
+    },
+    {
+      "epoch": 1.9806094182825484,
+      "grad_norm": 0.3356544077396393,
+      "learning_rate": 2.8701519294371815e-09,
+      "loss": 0.0034,
+      "step": 715
+    },
+    {
+      "epoch": 1.994459833795014,
+      "grad_norm": 0.1703202724456787,
+      "learning_rate": 2.3431870562917735e-10,
+      "loss": 0.0019,
+      "step": 720
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.0028210037853568792,
+      "eval_runtime": 3.8079,
+      "eval_samples_per_second": 0.788,
+      "eval_steps_per_second": 0.788,
+      "step": 722
+    },
+    {
+      "epoch": 2.0,
+      "step": 722,
+      "total_flos": 1.3020461702683034e+17,
+      "train_loss": 0.037445212856265314,
+      "train_runtime": 3051.2562,
+      "train_samples_per_second": 0.237,
+      "train_steps_per_second": 0.237
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 722,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.3020461702683034e+17,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}