Upload folder using huggingface_hub

Browse files

Files changed (15) hide show

.gitattributes +1 -0
README.md +273 -0
all_results.json +21 -0
config.json +28 -0
eval_results.json +16 -0
generation_config.json +7 -0
model.safetensors.index.json +261 -0
output-00001-of-00002.safetensors +3 -0
output-00002-of-00002.safetensors +3 -0
special_tokens_map.json +34 -0
thumbnail.png +0 -0
tokenizer.json +3 -0
tokenizer_config.json +70 -0
train_results.json +8 -0
trainer_state.json +211 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,273 @@

+---
+license: other
+license_name: gemma-terms-of-use
+license_link: https://ai.google.dev/gemma/terms
+base_model: HuggingFaceH4/zephyr-7b-gemma-sft-v0.1
+tags:
+- alignment-handbook
+- trl
+- dpo
+- generated_from_trainer
+datasets:
+- argilla/dpo-mix-7k
+pipeline_tag: text-generation
+model-index:
+- name: zephyr-7b-gemma
+  results:
+  # MT-Bench (taken from model card)
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: MT-Bench
+      type: unknown
+    metrics:
+       - type: unknown
+         name: score
+         value: 7.81
+    source:
+      url: https://huggingface.co/spaces/lmsys/mt-bench
+---
+<img src="https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-v0.1/resolve/main/thumbnail.png" alt="Zephyr 7B Gemma Logo" width="800" style="margin-left:'auto' margin-right:'auto' display:'block'"/>
+# Model Card for Zephyr 7B Gemma
+Zephyr is a series of language models that are trained to act as helpful assistants. Zephyr 7B Gemma is the third model in the series, and is a fine-tuned version of [`google/gemma-7b`](https://huggingface.co/google/gemma-7b) that was trained on on a mix of publicly available, synthetic datasets using Direct Preference Optimization (DPO). You can reproduce the training of this model via the recipe provided in the [Alignment Handbook](https://github.com/huggingface/alignment-handbook).
+## Model description
+- **Model type:** A 7B parameter GPT-like model fine-tuned on a mix of publicly available, synthetic datasets.
+- **Language(s) (NLP):** Primarily English
+- **License:** Gemma Terms of Use
+- **Finetuned from model:** [google/gemma-7b](https://huggingface.co/google/gemma-7b)
+### Model Sources
+<!-- Provide the basic links for the model. -->
+- **Repository:** https://github.com/huggingface/alignment-handbook
+- **Demo:** https://huggingface.co/spaces/HuggingFaceH4/zephyr-7b-gemma-chat
+## Performance
+|                                 Model                                 |MT Bench⬇️|IFEval|
+|-----------------------------------------------------------------------|------:|------:|
+|[zephyr-7b-gemma-v0.1](https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-v0.1)|  7.81 |  28.76|
+|[zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)  |  7.34 |  43.81|
+|[google/gemma-7b-it](https://huggingface.co/google/gemma-7b-it)               |  6.38 |  38.01|
+|                                 Model                                 |AGIEval|GPT4All|TruthfulQA|BigBench|Average ⬇️|
+|-----------------------------------------------------------------------|------:|------:|---------:|-------:|------:|
+|[zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)  |  37.52|  71.77|     55.26|   39.77|  51.08|
+|[zephyr-7b-gemma-v0.1](https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-v0.1)|  34.22|  66.37|     52.19|   37.10|  47.47|
+|[mlabonne/Gemmalpaca-7B](https://huggingface.co/mlabonne/Gemmalpaca-7B)|  21.6 |  40.87|     44.85 |   30.49|  34.45|
+|[google/gemma-7b-it](https://huggingface.co/google/gemma-7b-it)        |  21.33|  40.84|     41.70|   30.25|  33.53|
+<details><summary>Details of AGIEval, GPT4All, TruthfulQA, BigBench </summary>
+### AGIEval
+|             Task             |Version| Metric |Value|   |Stderr|
+|------------------------------|------:|--------|----:|---|-----:|
+|agieval_aqua_rat              |      0|acc     |21.65|±  |  2.59|
+|                              |       |acc_norm|25.20|±  |  2.73|
+|agieval_logiqa_en             |      0|acc     |34.72|±  |  1.87|
+|                              |       |acc_norm|35.94|±  |  1.88|
+|agieval_lsat_ar               |      0|acc     |19.57|±  |  2.62|
+|                              |       |acc_norm|21.74|±  |  2.73|
+|agieval_lsat_lr               |      0|acc     |30.59|±  |  2.04|
+|                              |       |acc_norm|32.55|±  |  2.08|
+|agieval_lsat_rc               |      0|acc     |49.07|±  |  3.05|
+|                              |       |acc_norm|42.75|±  |  3.02|
+|agieval_sat_en                |      0|acc     |54.85|±  |  3.48|
+|                              |       |acc_norm|53.40|±  |  3.48|
+|agieval_sat_en_without_passage|      0|acc     |37.38|±  |  3.38|
+|                              |       |acc_norm|33.98|±  |  3.31|
+|agieval_sat_math              |      0|acc     |30.91|±  |  3.12|
+|                              |       |acc_norm|28.18|±  |  3.04|
+Average: 34.22%
+### GPT4All
+|    Task     |Version| Metric |Value|   |Stderr|
+|-------------|------:|--------|----:|---|-----:|
+|arc_challenge|      0|acc     |49.15|±  |  1.46|
+|             |       |acc_norm|52.47|±  |  1.46|
+|arc_easy     |      0|acc     |77.44|±  |  0.86|
+|             |       |acc_norm|74.75|±  |  0.89|
+|boolq        |      1|acc     |79.69|±  |  0.70|
+|hellaswag    |      0|acc     |60.59|±  |  0.49|
+|             |       |acc_norm|78.00|±  |  0.41|
+|openbookqa   |      0|acc     |29.20|±  |  2.04|
+|             |       |acc_norm|37.80|±  |  2.17|
+|piqa         |      0|acc     |76.82|±  |  0.98|
+|             |       |acc_norm|77.80|±  |  0.97|
+|winogrande   |      0|acc     |64.09|±  |  1.35|
+Average: 66.37%
+### TruthfulQA
+|    Task     |Version|Metric|Value|   |Stderr|
+|-------------|------:|------|----:|---|-----:|
+|truthfulqa_mc|      1|mc1   |35.74|±  |  1.68|
+|             |       |mc2   |52.19|±  |  1.59|
+Average: 52.19%
+### Bigbench
+|                      Task                      |Version|       Metric        |Value|   |Stderr|
+|------------------------------------------------|------:|---------------------|----:|---|-----:|
+|bigbench_causal_judgement                       |      0|multiple_choice_grade|53.68|±  |  3.63|
+|bigbench_date_understanding                     |      0|multiple_choice_grade|59.89|±  |  2.55|
+|bigbench_disambiguation_qa                      |      0|multiple_choice_grade|30.23|±  |  2.86|
+|bigbench_geometric_shapes                       |      0|multiple_choice_grade|11.42|±  |  1.68|
+|                                                |       |exact_str_match      | 0.00|±  |  0.00|
+|bigbench_logical_deduction_five_objects         |      0|multiple_choice_grade|28.40|±  |  2.02|
+|bigbench_logical_deduction_seven_objects        |      0|multiple_choice_grade|19.14|±  |  1.49|
+|bigbench_logical_deduction_three_objects        |      0|multiple_choice_grade|44.67|±  |  2.88|
+|bigbench_movie_recommendation                   |      0|multiple_choice_grade|26.80|±  |  1.98|
+|bigbench_navigate                               |      0|multiple_choice_grade|50.00|±  |  1.58|
+|bigbench_reasoning_about_colored_objects        |      0|multiple_choice_grade|52.75|±  |  1.12|
+|bigbench_ruin_names                             |      0|multiple_choice_grade|33.04|±  |  2.22|
+|bigbench_salient_translation_error_detection    |      0|multiple_choice_grade|33.37|±  |  1.49|
+|bigbench_snarks                                 |      0|multiple_choice_grade|48.62|±  |  3.73|
+|bigbench_sports_understanding                   |      0|multiple_choice_grade|58.11|±  |  1.57|
+|bigbench_temporal_sequences                     |      0|multiple_choice_grade|37.20|±  |  1.53|
+|bigbench_tracking_shuffled_objects_five_objects |      0|multiple_choice_grade|20.08|±  |  1.13|
+|bigbench_tracking_shuffled_objects_seven_objects|      0|multiple_choice_grade|15.77|±  |  0.87|
+|bigbench_tracking_shuffled_objects_three_objects|      0|multiple_choice_grade|44.67|±  |  2.88|
+Average: 37.1%
+</details>
+## Intended uses & limitations
+The model was initially fine-tuned on the [DEITA 10K](https://huggingface.co/datasets/HuggingFaceH4/deita-10k-v0-sft)  dataset, which contains a diverse range of synthetic dialogues generated by ChatGPT.
+We then further aligned the model with [🤗 TRL's](https://github.com/huggingface/trl) `DPOTrainer` on the [argilla/dpo-mix-7k](https://huggingface.co/datasets/argilla/dpo-mix-7k) dataset, which contains 7k prompts and model completions that are ranked by GPT-4. As a result, the model can be used for chat and you can check out our [demo](https://huggingface.co/spaces/HuggingFaceH4/zephyr-chat) to test its capabilities.
+Here's how you can run the model using the `pipeline()` function from 🤗 Transformers:
+```python
+# pip install transformers>=4.38.2
+# pip install accelerate
+import torch
+from transformers import pipeline
+pipe = pipeline(
+    "text-generation",
+    model="HuggingFaceH4/zephyr-7b-gemma-v0.1",
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+)
+messages = [
+    {
+        "role": "system",
+        "content": "",  # Model not yet trained for follow this
+    },
+    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
+]
+outputs = pipe(
+    messages,
+    max_new_tokens=128,
+    do_sample=True,
+    temperature=0.7,
+    top_k=50,
+    top_p=0.95,
+    stop_sequence="<|im_end|>",
+)
+print(outputs[0]["generated_text"][-1]["content"])
+# It is not possible for a human to eat a helicopter in one sitting, as a
+# helicopter is a large and inedible machine. Helicopters are made of metal,
+# plastic, and other materials that are not meant to be consumed by humans.
+# Eating a helicopter would be extremely dangerous and would likely cause
+# serious health problems, including choking, suffocation, and poisoning. It is
+# important to only eat food that is safe and intended for human consumption.
+```
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+Zephyr 7B Gemma has not been aligned to human preferences for safety within the RLHF phase or deployed with in-the-loop filtering of responses like ChatGPT, so the model can produce problematic outputs (especially when prompted to do so). It is also unknown what the size and composition of the corpus was used to train the base model (`google/gemma-7b`), however it is likely to have included a mix of Web data and technical sources like books and code. See the [StarCoder2 model card](https://huggingface.co/bigcode/starcoder2-15b) for an example of this.
+## Training and evaluation data
+This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-gemma-sft-v0.1](https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-sft-v0.1) on the argilla/dpo-mix-7k dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.4695
+- Rewards/chosen: -3.3746
+- Rewards/rejected: -4.9715
+- Rewards/accuracies: 0.7188
+- Rewards/margins: 1.5970
+- Logps/rejected: -459.4853
+- Logps/chosen: -429.9115
+- Logits/rejected: 86.4684
+- Logits/chosen: 92.8200
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-07
+- train_batch_size: 2
+- eval_batch_size: 4
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 8
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 128
+- total_eval_batch_size: 32
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 2
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
+|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
+| 0.1923        | 1.9   | 100  | 0.4736          | -3.4575        | -4.9556          | 0.75               | 1.4980          | -459.1662      | -431.5707    | 86.3863         | 92.7360       |
+### Framework versions
+- Transformers 4.39.0.dev0
+- Pytorch 2.1.2+cu121
+- Datasets 2.14.6
+- Tokenizers 0.15.1
+## Citation Information
+If you find this model useful in your work, please consider citing the Zephyr technical report:
+```
+@misc{tunstall2023zephyr,
+      title={Zephyr: Direct Distillation of LM Alignment},
+      author={Lewis Tunstall and Edward Beeching and Nathan Lambert and Nazneen Rajani and Kashif Rasul and Younes Belkada and Shengyi Huang and Leandro von Werra and Clémentine Fourrier and Nathan Habib and Nathan Sarrazin and Omar Sanseviero and Alexander M. Rush and Thomas Wolf},
+      year={2023},
+      eprint={2310.16944},
+      archivePrefix={arXiv},
+      primaryClass={cs.LG}
+}
+```
+You may also wish to cite the creators of this model as well:
+```
+@misc{zephyr_7b_gemma,
+  author = {Lewis Tunstall and Philipp Schmid},
+  title = {Zephyr 7B Gemma},
+  year = {2024},
+  publisher = {Hugging Face},
+  journal = {Hugging Face repository},
+  howpublished = {\url{https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-v0.1}}
+}
+```

all_results.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+    "epoch": 1.97,
+    "eval_logits/chosen": 92.81997680664062,
+    "eval_logits/rejected": 86.46841430664062,
+    "eval_logps/chosen": -429.9114685058594,
+    "eval_logps/rejected": -459.4852600097656,
+    "eval_loss": 0.4695254862308502,
+    "eval_rewards/accuracies": 0.71875,
+    "eval_rewards/chosen": -3.3745555877685547,
+    "eval_rewards/margins": 1.5969535112380981,
+    "eval_rewards/rejected": -4.9715094566345215,
+    "eval_runtime": 52.4051,
+    "eval_samples": 750,
+    "eval_samples_per_second": 14.312,
+    "eval_steps_per_second": 0.458,
+    "train_loss": 0.38887147261546207,
+    "train_runtime": 1183.8142,
+    "train_samples": 6750,
+    "train_samples_per_second": 11.404,
+    "train_steps_per_second": 0.088
+}

config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "_name_or_path": "lewtun/zephyr-7b-gemma-sft",
+  "architectures": [
+    "GemmaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 2,
+  "eos_token_id": 1,
+  "head_dim": 256,
+  "hidden_act": "gelu",
+  "hidden_size": 3072,
+  "initializer_range": 0.02,
+  "intermediate_size": 24576,
+  "max_position_embeddings": 8192,
+  "model_type": "gemma",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 16,
+  "pad_token_id": 0,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.39.0.dev0",
+  "use_cache": true,
+  "vocab_size": 256000
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "epoch": 1.97,
+    "eval_logits/chosen": 92.81997680664062,
+    "eval_logits/rejected": 86.46841430664062,
+    "eval_logps/chosen": -429.9114685058594,
+    "eval_logps/rejected": -459.4852600097656,
+    "eval_loss": 0.4695254862308502,
+    "eval_rewards/accuracies": 0.71875,
+    "eval_rewards/chosen": -3.3745555877685547,
+    "eval_rewards/margins": 1.5969535112380981,
+    "eval_rewards/rejected": -4.9715094566345215,
+    "eval_runtime": 52.4051,
+    "eval_samples": 750,
+    "eval_samples_per_second": 14.312,
+    "eval_steps_per_second": 0.458
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 2,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.39.0.dev0"
+}

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,261 @@

+{
+  "metadata": {
+    "total_size": 17075361792
+  },
+  "weight_map": {
+    "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.norm.weight": "model-00004-of-00004.safetensors"
+  }
+}

output-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf586e941391b309b9d9cca7ef093ee0a4ce51e506b0ad85f282ed34c4a2fb26
+size 8543943248

output-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:25d274309aff742e170afce4876aaa4f42d39e80b235fce0b1ffd67fb6f48c59
+size 1563370576

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>"
+  ],
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

thumbnail.png ADDED Viewed

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22449cb9ef4bad0db7dd93b46ddff7ab7d6a654dd4f903e130ddb6361eac3af5
+size 17477473

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,70 @@

+{
+  "add_bos_token": false,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<eos>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<bos>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "106": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "107": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>"
+  ],
+  "bos_token": "<bos>",
+  "chat_template": "{% if messages[0]['role'] == 'user' or messages[0]['role'] == 'system' %}{{ bos_token }}{% endif %}{% for message in messages %}{{ '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% elif messages[-1]['role'] == 'assistant' %}{{ eos_token }}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<eos>",
+  "legacy": null,
+  "model_max_length": 2048,
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.97,
+    "train_loss": 0.38887147261546207,
+    "train_runtime": 1183.8142,
+    "train_samples": 6750,
+    "train_samples_per_second": 11.404,
+    "train_steps_per_second": 0.088
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,211 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.971563981042654,
+  "eval_steps": 100,
+  "global_step": 104,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02,
+      "grad_norm": 139.638709617328,
+      "learning_rate": 4.545454545454545e-08,
+      "logits/chosen": 111.16130065917969,
+      "logits/rejected": 86.8372802734375,
+      "logps/chosen": -326.8536071777344,
+      "logps/rejected": -329.15960693359375,
+      "loss": 0.6931,
+      "rewards/accuracies": 0.0,
+      "rewards/chosen": 0.0,
+      "rewards/margins": 0.0,
+      "rewards/rejected": 0.0,
+      "step": 1
+    },
+    {
+      "epoch": 0.19,
+      "grad_norm": 141.5345140695996,
+      "learning_rate": 4.545454545454545e-07,
+      "logits/chosen": 110.37065124511719,
+      "logits/rejected": 133.2639923095703,
+      "logps/chosen": -350.3541259765625,
+      "logps/rejected": -434.3558349609375,
+      "loss": 0.7191,
+      "rewards/accuracies": 0.4722222089767456,
+      "rewards/chosen": 0.13274627923965454,
+      "rewards/margins": 0.07573667168617249,
+      "rewards/rejected": 0.05700961872935295,
+      "step": 10
+    },
+    {
+      "epoch": 0.38,
+      "grad_norm": 123.71909837085582,
+      "learning_rate": 4.885348141000122e-07,
+      "logits/chosen": 117.74342346191406,
+      "logits/rejected": 128.52548217773438,
+      "logps/chosen": -333.21240234375,
+      "logps/rejected": -410.2923889160156,
+      "loss": 0.6097,
+      "rewards/accuracies": 0.7124999761581421,
+      "rewards/chosen": 0.11470325291156769,
+      "rewards/margins": 0.7479402422904968,
+      "rewards/rejected": -0.6332370042800903,
+      "step": 20
+    },
+    {
+      "epoch": 0.57,
+      "grad_norm": 111.89651526533274,
+      "learning_rate": 4.5025027361734613e-07,
+      "logits/chosen": 114.44095611572266,
+      "logits/rejected": 119.11683654785156,
+      "logps/chosen": -399.1412048339844,
+      "logps/rejected": -474.2645568847656,
+      "loss": 0.596,
+      "rewards/accuracies": 0.7250000238418579,
+      "rewards/chosen": -1.7276217937469482,
+      "rewards/margins": 1.0803521871566772,
+      "rewards/rejected": -2.807974100112915,
+      "step": 30
+    },
+    {
+      "epoch": 0.76,
+      "grad_norm": 102.67088507130228,
+      "learning_rate": 3.893311157806091e-07,
+      "logits/chosen": 116.33101654052734,
+      "logits/rejected": 111.0595703125,
+      "logps/chosen": -428.7275390625,
+      "logps/rejected": -464.0934143066406,
+      "loss": 0.5343,
+      "rewards/accuracies": 0.7250000238418579,
+      "rewards/chosen": -2.2770252227783203,
+      "rewards/margins": 0.9522085189819336,
+      "rewards/rejected": -3.229233503341675,
+      "step": 40
+    },
+    {
+      "epoch": 0.95,
+      "grad_norm": 130.9996197198566,
+      "learning_rate": 3.126631330646801e-07,
+      "logits/chosen": 123.2393569946289,
+      "logits/rejected": 124.50789642333984,
+      "logps/chosen": -438.548095703125,
+      "logps/rejected": -474.1234436035156,
+      "loss": 0.5138,
+      "rewards/accuracies": 0.762499988079071,
+      "rewards/chosen": -2.3258581161499023,
+      "rewards/margins": 1.3220884799957275,
+      "rewards/rejected": -3.647946834564209,
+      "step": 50
+    },
+    {
+      "epoch": 1.14,
+      "grad_norm": 56.950942870641875,
+      "learning_rate": 2.2891223348923882e-07,
+      "logits/chosen": 122.619384765625,
+      "logits/rejected": 126.1447525024414,
+      "logps/chosen": -414.3634338378906,
+      "logps/rejected": -468.19586181640625,
+      "loss": 0.2724,
+      "rewards/accuracies": 0.893750011920929,
+      "rewards/chosen": -2.3773388862609863,
+      "rewards/margins": 2.358515501022339,
+      "rewards/rejected": -4.735854148864746,
+      "step": 60
+    },
+    {
+      "epoch": 1.33,
+      "grad_norm": 52.820355390804025,
+      "learning_rate": 1.4754491880085317e-07,
+      "logits/chosen": 117.16709899902344,
+      "logits/rejected": 118.9737319946289,
+      "logps/chosen": -387.70526123046875,
+      "logps/rejected": -511.97503662109375,
+      "loss": 0.1936,
+      "rewards/accuracies": 0.9437500238418579,
+      "rewards/chosen": -2.4186935424804688,
+      "rewards/margins": 2.5914835929870605,
+      "rewards/rejected": -5.010177135467529,
+      "step": 70
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 51.657826972971314,
+      "learning_rate": 7.775827023107834e-08,
+      "logits/chosen": 124.15473937988281,
+      "logits/rejected": 125.7086181640625,
+      "logps/chosen": -446.75421142578125,
+      "logps/rejected": -543.6109619140625,
+      "loss": 0.1779,
+      "rewards/accuracies": 0.981249988079071,
+      "rewards/chosen": -2.316882848739624,
+      "rewards/margins": 2.962496757507324,
+      "rewards/rejected": -5.279379844665527,
+      "step": 80
+    },
+    {
+      "epoch": 1.71,
+      "grad_norm": 86.34373603352554,
+      "learning_rate": 2.7440387297912122e-08,
+      "logits/chosen": 107.07579040527344,
+      "logits/rejected": 111.74522399902344,
+      "logps/chosen": -425.4237365722656,
+      "logps/rejected": -509.67718505859375,
+      "loss": 0.1765,
+      "rewards/accuracies": 0.9437500238418579,
+      "rewards/chosen": -2.749206066131592,
+      "rewards/margins": 3.0597147941589355,
+      "rewards/rejected": -5.8089213371276855,
+      "step": 90
+    },
+    {
+      "epoch": 1.9,
+      "grad_norm": 51.66215546933828,
+      "learning_rate": 2.27878296044029e-09,
+      "logits/chosen": 123.38490295410156,
+      "logits/rejected": 113.675537109375,
+      "logps/chosen": -439.7268981933594,
+      "logps/rejected": -550.8162841796875,
+      "loss": 0.1923,
+      "rewards/accuracies": 0.9624999761581421,
+      "rewards/chosen": -2.560769557952881,
+      "rewards/margins": 3.2135703563690186,
+      "rewards/rejected": -5.77433967590332,
+      "step": 100
+    },
+    {
+      "epoch": 1.9,
+      "eval_logits/chosen": 92.73604583740234,
+      "eval_logits/rejected": 86.38631439208984,
+      "eval_logps/chosen": -431.5707092285156,
+      "eval_logps/rejected": -459.1661682128906,
+      "eval_loss": 0.4735770523548126,
+      "eval_rewards/accuracies": 0.75,
+      "eval_rewards/chosen": -3.4575202465057373,
+      "eval_rewards/margins": 1.4980329275131226,
+      "eval_rewards/rejected": -4.9555535316467285,
+      "eval_runtime": 50.3064,
+      "eval_samples_per_second": 14.909,
+      "eval_steps_per_second": 0.477,
+      "step": 100
+    },
+    {
+      "epoch": 1.97,
+      "step": 104,
+      "total_flos": 0.0,
+      "train_loss": 0.38887147261546207,
+      "train_runtime": 1183.8142,
+      "train_samples_per_second": 11.404,
+      "train_steps_per_second": 0.088
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 104,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "total_flos": 0.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}