diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..85d4bc5c44616728f4670ce42badfeb0fc33c712
Binary files /dev/null and b/.DS_Store differ
diff --git a/checkpoint-10/README.md b/checkpoint-10/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-10/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-10/adapter_config.json b/checkpoint-10/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-10/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-10/adapter_model.safetensors b/checkpoint-10/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0c49e0b36b0f59b4667f08fa7d31832a70b0f029
--- /dev/null
+++ b/checkpoint-10/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4cb31cc373de33f5dd40340ee19f3d56cbe5d31be7553a60542a3e4b63eaac1
+size 5919456
diff --git a/checkpoint-10/optimizer.pt b/checkpoint-10/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e617b3749a7436d0f717db29808639aa61c33606
--- /dev/null
+++ b/checkpoint-10/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28fa42275a7ad0b9233963bf130a7a38f00549d3464cd05287a011f7283b386e
+size 11930938
diff --git a/checkpoint-10/rng_state_0.pth b/checkpoint-10/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d07c0d30a3625cf05559328a4f294c03f120ae4e
--- /dev/null
+++ b/checkpoint-10/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:279c231f7db5849b53ea6f61278709c8be27bcc46fc1b36100377bf36c55cfb9
+size 15024
diff --git a/checkpoint-10/rng_state_1.pth b/checkpoint-10/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..75dabdf47ca16589484e7a8764746b8cd6ed5460
--- /dev/null
+++ b/checkpoint-10/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35738ebb9e53709608b7f4feaf1edbde1a19901d813f15922153ded80ead6540
+size 15024
diff --git a/checkpoint-10/rng_state_2.pth b/checkpoint-10/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ddccb6f4cdc16ba7966cfc15f73a942d772698dc
--- /dev/null
+++ b/checkpoint-10/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f411b8c60d90c0733bb03c4955ea2e40ab35464f214cb47cc4d6d0eaa83bc79
+size 15024
diff --git a/checkpoint-10/rng_state_3.pth b/checkpoint-10/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a3043bb36c63411ce0f5a39028bda1d8676ba04d
--- /dev/null
+++ b/checkpoint-10/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7224ff493b87486a3e2c3001115ad539913e8fe95cf25f4bcae3236f97e83f41
+size 15024
diff --git a/checkpoint-10/scheduler.pt b/checkpoint-10/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..badaf6fcf3bf6390dc84b12a85523b22f0185197
--- /dev/null
+++ b/checkpoint-10/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:284a8517b2310fb02b51d6b8ddd318d3f6e139475d47fc950976ac1287debd43
+size 1064
diff --git a/checkpoint-10/trainer_state.json b/checkpoint-10/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..a036513474d8fd4cebacedeff30da78dd0315c3a
--- /dev/null
+++ b/checkpoint-10/trainer_state.json
@@ -0,0 +1,28 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.0007256367462448298,
+  "eval_steps": 500,
+  "global_step": 10,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-10/training_args.bin b/checkpoint-10/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-10/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-100/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-100/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..021fc852faed26d156485766fe877116f60430c6
--- /dev/null
+++ b/checkpoint-100/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e721936252802fd98963ef70a467193ddec06dcd286f36a6862a642d99cbb657
+size 5919456
diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8067fdfe1605f18619726be4c41ce01307c8af1d
--- /dev/null
+++ b/checkpoint-100/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80a21e2292a229656009d74eddf004f8818323f0e22e5f248423b3fb4a2ce550
+size 11930938
diff --git a/checkpoint-100/rng_state_0.pth b/checkpoint-100/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c8bbbc393d2f7f6d7f761800ae69c6c68a174255
--- /dev/null
+++ b/checkpoint-100/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85f43e42ff30186bb51f3d90dcd7d261d6e09960636961fd696f9478303d1331
+size 15024
diff --git a/checkpoint-100/rng_state_1.pth b/checkpoint-100/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..edd22345065182eb183497bc022e0478fcff51b0
--- /dev/null
+++ b/checkpoint-100/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:daf210db9013b20416569b6811b878570fbbf461f867de41a8a69fd07f0d2c8c
+size 15024
diff --git a/checkpoint-100/rng_state_2.pth b/checkpoint-100/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a0d578b7350bb04a417db4a4c480b09aca69ee1a
--- /dev/null
+++ b/checkpoint-100/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:132e0dad8f05cba3da38386b81951c801df7c5c2c1cf9e06b5d359b7b92422da
+size 15024
diff --git a/checkpoint-100/rng_state_3.pth b/checkpoint-100/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b148f15da5fe8c4b609ef13484f64240b435570a
--- /dev/null
+++ b/checkpoint-100/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:441e2aab46e3935d5d49029fda3ebaf07053ac3a8e8a6eb7aca038ab1127bea1
+size 15024
diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..09e282e1ca9a123e2774ffa443cbb8724cb09ae6
--- /dev/null
+++ b/checkpoint-100/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:673ccf272a6ac969319297986ad3a484281eb54814eca9e7ffb558668525c080
+size 1064
diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..917d53134f51d8b9c14b1173b4226166e8fedf00
--- /dev/null
+++ b/checkpoint-100/trainer_state.json
@@ -0,0 +1,91 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.007256367462448298,
+  "eval_steps": 500,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-100/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-110/README.md b/checkpoint-110/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-110/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-110/adapter_config.json b/checkpoint-110/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-110/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-110/adapter_model.safetensors b/checkpoint-110/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e8f320c54d69cead030fd4b88806376cd01efd86
--- /dev/null
+++ b/checkpoint-110/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad6a6cac831f743cfcbb872e7f91d48561921eda330fbec71ebb5da5cda344bc
+size 5919456
diff --git a/checkpoint-110/optimizer.pt b/checkpoint-110/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..51b7a0e6196db34c33cfc8bc6a9293ccecf5ac94
--- /dev/null
+++ b/checkpoint-110/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c1d05c28b69b29cab4f2f112063e58e2eb30366e9fd3470b771ab44766ba229
+size 11930938
diff --git a/checkpoint-110/rng_state_0.pth b/checkpoint-110/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ab1b3f8498e8893f53b0e7445652083013543648
--- /dev/null
+++ b/checkpoint-110/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac95db18ae6a1e414f19563e15335ec1a3d44d5b26a3896a591a42bf53daac57
+size 15024
diff --git a/checkpoint-110/rng_state_1.pth b/checkpoint-110/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7760ee8749e66a9d62b997c70fef033a376345a8
--- /dev/null
+++ b/checkpoint-110/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ad2adf8c9d84012d5c08bc34b7d7b7bd8f571238b97deba7b563bc8579f284e
+size 15024
diff --git a/checkpoint-110/rng_state_2.pth b/checkpoint-110/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..94ca491bdda1c6621eb39af13f7cf2922e82a4dd
--- /dev/null
+++ b/checkpoint-110/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d02fc29e95ce367f0b8273bbbf6e41186c317282c9a486968d768ffcb716f8dd
+size 15024
diff --git a/checkpoint-110/rng_state_3.pth b/checkpoint-110/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3aea3e1a77a34852a8edf99d524471d3e209ba56
--- /dev/null
+++ b/checkpoint-110/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:540b9cf222feb8019c875aee3fd37ce5b892ea395b93ddd0b75459462687e321
+size 15024
diff --git a/checkpoint-110/scheduler.pt b/checkpoint-110/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d0e3e18d469cb0c6a7af39220bf58584fde05dff
--- /dev/null
+++ b/checkpoint-110/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f8534b1b0a88b0a2dee78197a1a006b8ead7b92df098f891541715a1dc34a24
+size 1064
diff --git a/checkpoint-110/trainer_state.json b/checkpoint-110/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..2ec4597924fd5e620cd2ed3daa0d59d0e7a01546
--- /dev/null
+++ b/checkpoint-110/trainer_state.json
@@ -0,0 +1,98 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.007982004208693128,
+  "eval_steps": 500,
+  "global_step": 110,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-110/training_args.bin b/checkpoint-110/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-110/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-120/README.md b/checkpoint-120/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-120/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-120/adapter_config.json b/checkpoint-120/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-120/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-120/adapter_model.safetensors b/checkpoint-120/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7e8497db18de65989ef34a388bb2eeae884c90eb
--- /dev/null
+++ b/checkpoint-120/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0dc098458d2b538a13af7f923cca9baadd6cbad6a3ff47358620fc1337d4cdf
+size 5919456
diff --git a/checkpoint-120/optimizer.pt b/checkpoint-120/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ab0762232da80d50577e5cdde8216c8012dd01a5
--- /dev/null
+++ b/checkpoint-120/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b09d6d7d21066366d44b942d7a2b0b49d79b9fb3b2e17af43c09a19b626ed07
+size 11930938
diff --git a/checkpoint-120/rng_state_0.pth b/checkpoint-120/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3d502a9e5a520de440672e348bb1c204e25bdd4a
--- /dev/null
+++ b/checkpoint-120/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e010d5c5e5ff459e09cee093e035058bce80bd0e562b9008cf49e37a37c4a265
+size 15024
diff --git a/checkpoint-120/rng_state_1.pth b/checkpoint-120/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..97e170d86dde07d3718359b683368b6ef495a27a
--- /dev/null
+++ b/checkpoint-120/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ceaec8c84867fda1405ca685c206ff5498d51b755970edb435f4777d1649c24
+size 15024
diff --git a/checkpoint-120/rng_state_2.pth b/checkpoint-120/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1f4509894b817e8040026ae28468826ddeeaec22
--- /dev/null
+++ b/checkpoint-120/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:791eebd4302125380e3da4e87668a4bb1db8af54a2e9f9519cb225a5eefb78b6
+size 15024
diff --git a/checkpoint-120/rng_state_3.pth b/checkpoint-120/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..73faf1586f171185dab1d897d8a662e1288539f7
--- /dev/null
+++ b/checkpoint-120/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b5e8577ac55da749e71c292571c66ba7068eaeeac8f69a2d9ecb004c4ea24df
+size 15024
diff --git a/checkpoint-120/scheduler.pt b/checkpoint-120/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6de9e6b38a461c515350b721dea64c4d3af752f5
--- /dev/null
+++ b/checkpoint-120/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce3f2037fd0e6b38f795755403c77706eafd4be540aa363267475ac986bd25f5
+size 1064
diff --git a/checkpoint-120/trainer_state.json b/checkpoint-120/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..215936037c6e7291c48ed8168457355ad2c857b5
--- /dev/null
+++ b/checkpoint-120/trainer_state.json
@@ -0,0 +1,105 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.008707640954937958,
+  "eval_steps": 500,
+  "global_step": 120,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-120/training_args.bin b/checkpoint-120/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-120/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-130/README.md b/checkpoint-130/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-130/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-130/adapter_config.json b/checkpoint-130/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-130/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-130/adapter_model.safetensors b/checkpoint-130/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a139d867c4780a888f7e9f0ecb675b49bc48fc62
--- /dev/null
+++ b/checkpoint-130/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9fe95c4565aeb393b1b396f18af579a69771a674483dbab2d6c186310fb08c74
+size 5919456
diff --git a/checkpoint-130/optimizer.pt b/checkpoint-130/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cb579f9e05ade0b709a229b5437a9cbf1d366df3
--- /dev/null
+++ b/checkpoint-130/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30edf8ef192e630551b6f042d64674afb872062ad1f496520768b24e1d3fdb05
+size 11930938
diff --git a/checkpoint-130/rng_state_0.pth b/checkpoint-130/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8b936af33e9d245baa21fd749c30683b93cd7deb
--- /dev/null
+++ b/checkpoint-130/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ced0c6ba13f477a0dbd44034592fe000f226e71898cbab5bf87ce59dc6bde36
+size 15024
diff --git a/checkpoint-130/rng_state_1.pth b/checkpoint-130/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..042841fdacad262623786e1a413c9ff2141e72dd
--- /dev/null
+++ b/checkpoint-130/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e97d793be909b79220b59b211d87fda9d35184d2305c00641e9b4531b73b8441
+size 15024
diff --git a/checkpoint-130/rng_state_2.pth b/checkpoint-130/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a58abcc92adfb7f3e1dd84f85898b9b66ce6fd32
--- /dev/null
+++ b/checkpoint-130/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60416c656b12aaecd01e32e964532f371c0a6b02a4b9b91ccfdc35d45dce0050
+size 15024
diff --git a/checkpoint-130/rng_state_3.pth b/checkpoint-130/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3bce0c189cda9aff58d716ffa2eca649bd8a6592
--- /dev/null
+++ b/checkpoint-130/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:218ddffb5978f25094e6ad3cfbfc85ad7b807a183e3bc9f6f15bd471542d7273
+size 15024
diff --git a/checkpoint-130/scheduler.pt b/checkpoint-130/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e91583d2b5d95ffe3e7eba0911179ce06e571d3b
--- /dev/null
+++ b/checkpoint-130/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1cfc563a99d275c44661506e6df813c32ae4b04cafb6adaaa4449e91127f43d
+size 1064
diff --git a/checkpoint-130/trainer_state.json b/checkpoint-130/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d8556de5e5809ca1123eed2bfb3e45471d1c1a50
--- /dev/null
+++ b/checkpoint-130/trainer_state.json
@@ -0,0 +1,112 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.009433277701182788,
+  "eval_steps": 500,
+  "global_step": 130,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-130/training_args.bin b/checkpoint-130/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-130/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-140/README.md b/checkpoint-140/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-140/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-140/adapter_config.json b/checkpoint-140/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-140/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-140/adapter_model.safetensors b/checkpoint-140/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7baf961fa37f7b9ac76c4a662a30ccb4f144c643
--- /dev/null
+++ b/checkpoint-140/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54cdd542717001f813dbafcb576b3f296e4a2f72efc9aeadb5557c8d7d1945fb
+size 5919456
diff --git a/checkpoint-140/optimizer.pt b/checkpoint-140/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2b85b42ac8274fe14e803cad97d8ad3adcca6167
--- /dev/null
+++ b/checkpoint-140/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be96e1e75721e995205be7310e523fdedd96da16498e62cade920fc940ae2b32
+size 11930938
diff --git a/checkpoint-140/rng_state_0.pth b/checkpoint-140/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4a45d49b702e38a5a5d428f1c4170f10ba23d86a
--- /dev/null
+++ b/checkpoint-140/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ad67dbef2a21b26f3117ca45d621957bf72b1116535cf6e524b17661b94b1a9
+size 15024
diff --git a/checkpoint-140/rng_state_1.pth b/checkpoint-140/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2c6cf166b16a44d3e92adf964c8fc8ac82daa466
--- /dev/null
+++ b/checkpoint-140/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca3afd4f067268e4c6ff34242266c9e70bce106dd4d7365781bb893119a4033d
+size 15024
diff --git a/checkpoint-140/rng_state_2.pth b/checkpoint-140/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6bc0e1fa490cb0406141b021536c5be4e11e69e0
--- /dev/null
+++ b/checkpoint-140/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7a56e9bc058e763d68d477e80d923c2fe559a75d518ac8d5d693397a88304b3
+size 15024
diff --git a/checkpoint-140/rng_state_3.pth b/checkpoint-140/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..19d3b9f2dd05718c2f4579d52dfb0ab3259e1869
--- /dev/null
+++ b/checkpoint-140/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8aef494a54b19f2a6c92fb251d8acadbfc7c21bcba926f5a7f5fa134981bb678
+size 15024
diff --git a/checkpoint-140/scheduler.pt b/checkpoint-140/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..236a62e03df856086abcbd03a727077d25f0b0f0
--- /dev/null
+++ b/checkpoint-140/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f37f75b563805241f01236805ecd93e76fa35840939411337a4cd1f0771215b
+size 1064
diff --git a/checkpoint-140/trainer_state.json b/checkpoint-140/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..03ea437367ea672348035a6ecd7da9ce46e85398
--- /dev/null
+++ b/checkpoint-140/trainer_state.json
@@ -0,0 +1,119 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.010158914447427617,
+  "eval_steps": 500,
+  "global_step": 140,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-140/training_args.bin b/checkpoint-140/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-140/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-150/README.md b/checkpoint-150/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-150/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-150/adapter_config.json b/checkpoint-150/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-150/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-150/adapter_model.safetensors b/checkpoint-150/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9cdc41b41be2c2800a54073ecf4c2689dd43f887
--- /dev/null
+++ b/checkpoint-150/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb409bd39d5975a7448adb901bac1eb06398ddf51d54cd5291001a7604fb6dc2
+size 5919456
diff --git a/checkpoint-150/optimizer.pt b/checkpoint-150/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..473560083d8ac407f2c3eeb70b1bcbad53dd4f4e
--- /dev/null
+++ b/checkpoint-150/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9f864f1bca3aefbbc513b4a23725dbafb84a08442854a0d3825a3020cd44e50
+size 11930938
diff --git a/checkpoint-150/rng_state_0.pth b/checkpoint-150/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..106b260c38aee4031442b01a0b98f48c8b303ec3
--- /dev/null
+++ b/checkpoint-150/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c342af2ce35811f7314d04dcf27fe047ef7a2c2c65a53827cf5bfa3bbef9abbb
+size 15024
diff --git a/checkpoint-150/rng_state_1.pth b/checkpoint-150/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..886c97fe5792aca73064ce33a7d132bf9491199f
--- /dev/null
+++ b/checkpoint-150/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:732cda9265841aab840d0742ab54e81d4890cc436da4ad72a7491a2de6e456cd
+size 15024
diff --git a/checkpoint-150/rng_state_2.pth b/checkpoint-150/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..64926e520c41683a9beadc0b8bc88f1985920d14
--- /dev/null
+++ b/checkpoint-150/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9e93aaf91c3d45dc0a00b2862a0b23147bc87200884e67202507624081ba206
+size 15024
diff --git a/checkpoint-150/rng_state_3.pth b/checkpoint-150/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3f7bb92cc45f98dcd8c4767f5cedf6b4ddd4068f
--- /dev/null
+++ b/checkpoint-150/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bde38e9c7ebd4dcc6310f8e51cdb47e2f01b8ae902f2ef5613c6f4a36b2b5231
+size 15024
diff --git a/checkpoint-150/scheduler.pt b/checkpoint-150/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..254fce450e6a776d19a173dbf9ac5138c9d637a5
--- /dev/null
+++ b/checkpoint-150/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dfbaeb5b70ff8c056924a39650a51a7bdb8be3dcfb3678304690b37e1bbf9e25
+size 1064
diff --git a/checkpoint-150/trainer_state.json b/checkpoint-150/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..db15b5125125f336f0ce8b7d2ffe0ae8c795abe5
--- /dev/null
+++ b/checkpoint-150/trainer_state.json
@@ -0,0 +1,126 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.010884551193672447,
+  "eval_steps": 500,
+  "global_step": 150,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-150/training_args.bin b/checkpoint-150/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-150/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-160/README.md b/checkpoint-160/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-160/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-160/adapter_config.json b/checkpoint-160/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-160/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-160/adapter_model.safetensors b/checkpoint-160/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b7d25943f23dca7a356718cb563ea97aa7027a90
--- /dev/null
+++ b/checkpoint-160/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e97a3ae924aa8c38ce0cc6497764c2edc1ad3ffa6dc42744319334261a52e30
+size 5919456
diff --git a/checkpoint-160/optimizer.pt b/checkpoint-160/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..00b254ff075714a313efe79c3522943599ada689
--- /dev/null
+++ b/checkpoint-160/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d47bf63b4d37493b7b937961025ca5642fb56ea1b0c57c6b7181d051550eb98e
+size 11930938
diff --git a/checkpoint-160/rng_state_0.pth b/checkpoint-160/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1a348a58aeccdd834c1f8384d2c678d70847862b
--- /dev/null
+++ b/checkpoint-160/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ee31f68abe6b432686fdd272da2eeee5b73b4f8821bb04740f607c935a3aa05
+size 15024
diff --git a/checkpoint-160/rng_state_1.pth b/checkpoint-160/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d240092b087cdd0689f6c165bac6ef292c75d586
--- /dev/null
+++ b/checkpoint-160/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ccfaa2295f585cb34fb30444c879af586c3b440d8fe9e6b9aea87172ec7cd8b
+size 15024
diff --git a/checkpoint-160/rng_state_2.pth b/checkpoint-160/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6775a2dea14ed6e60f977be88fb2499831856136
--- /dev/null
+++ b/checkpoint-160/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e45ead505fead6b579c877849f1ab0e0143b1148c8d7d9edeac5bc999e7e7f49
+size 15024
diff --git a/checkpoint-160/rng_state_3.pth b/checkpoint-160/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7acca8ad061b6f3e98c8b2ec7ab4b61ba316c53c
--- /dev/null
+++ b/checkpoint-160/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a58731fe5c8e37c116d083c9cf2ea45cc7f0b235b603682161aea7c0598f46d9
+size 15024
diff --git a/checkpoint-160/scheduler.pt b/checkpoint-160/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8b3822869d5dfb7fca0d39a6bec5f43cc4b070e3
--- /dev/null
+++ b/checkpoint-160/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f9b45fba4fbf7d75633c769ce8555c60bc65b9ba071da0897e824d206d19758
+size 1064
diff --git a/checkpoint-160/trainer_state.json b/checkpoint-160/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..3c2e05e6ce4b59f901ee74e55e252ef4b9393ec6
--- /dev/null
+++ b/checkpoint-160/trainer_state.json
@@ -0,0 +1,133 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.011610187939917277,
+  "eval_steps": 500,
+  "global_step": 160,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-160/training_args.bin b/checkpoint-160/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-160/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-170/README.md b/checkpoint-170/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-170/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-170/adapter_config.json b/checkpoint-170/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-170/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-170/adapter_model.safetensors b/checkpoint-170/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e9f4e3fb9468c90c58a0e7619a2091f82aa2baae
--- /dev/null
+++ b/checkpoint-170/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d547223d9ed0815d4ed9f8332559f1fa2c474e14a341279196fa2ff838a4b147
+size 5919456
diff --git a/checkpoint-170/optimizer.pt b/checkpoint-170/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1f9ddba50cd1b10fde438e1dd4622afd7293f2d3
--- /dev/null
+++ b/checkpoint-170/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa9dd4db1fbb91f99fa4a50893386edaa261b6cfe0b5ceab06e410b50121a8cb
+size 11930938
diff --git a/checkpoint-170/rng_state_0.pth b/checkpoint-170/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0190fba96c029f1f316a97d8b807b161fc35b029
--- /dev/null
+++ b/checkpoint-170/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04b538b0af287645a40ce0df2860d4d1942ce7e4e6877b83687385b189a6e760
+size 15024
diff --git a/checkpoint-170/rng_state_1.pth b/checkpoint-170/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b268b03046e21e3683e2e0a4036ab6791a429e18
--- /dev/null
+++ b/checkpoint-170/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aab51a83d1aba072b3826961dc0d5c41018143d192a7b20b35cb398bb6e6a07d
+size 15024
diff --git a/checkpoint-170/rng_state_2.pth b/checkpoint-170/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..94467028ff324a304799c5d14be05069f7000437
--- /dev/null
+++ b/checkpoint-170/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa8def4f7d0cade6a61e0faf6026d6fbf8f9a96f925d11de9c3e2566bd370cf6
+size 15024
diff --git a/checkpoint-170/rng_state_3.pth b/checkpoint-170/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3a9cac704de366bbfd6be2c1775d76cc7a42733e
--- /dev/null
+++ b/checkpoint-170/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bce69a0041979668f11d6085ed3478968bd2dc3d7aeed480f09e380cf7f58cfc
+size 15024
diff --git a/checkpoint-170/scheduler.pt b/checkpoint-170/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..62b7786310b131cddbfd1961b3fbeab728b44f05
--- /dev/null
+++ b/checkpoint-170/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9abbc2cabeecc11131e7d46a76ca5440716372ee708a9b37e9b3583f86d5fa38
+size 1064
diff --git a/checkpoint-170/trainer_state.json b/checkpoint-170/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..9493f45944947a70ca4db7ce10c8e40463c1c6a2
--- /dev/null
+++ b/checkpoint-170/trainer_state.json
@@ -0,0 +1,140 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.012335824686162107,
+  "eval_steps": 500,
+  "global_step": 170,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-170/training_args.bin b/checkpoint-170/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-170/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-180/README.md b/checkpoint-180/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-180/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-180/adapter_config.json b/checkpoint-180/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-180/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-180/adapter_model.safetensors b/checkpoint-180/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e64c66506f3a5a3e92f4c9c99006bcdb0765b252
--- /dev/null
+++ b/checkpoint-180/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f126ed500918e15380ff3993858fe2247a8c4990602def9372cf9b165dce93cb
+size 5919456
diff --git a/checkpoint-180/optimizer.pt b/checkpoint-180/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2bb6ecdc40c1d8041eb762dd672dd628c0d616db
--- /dev/null
+++ b/checkpoint-180/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f8b8a04864c4d61c08d1438ac41ba91c560477a17f3c650d8fa6d0cb0744b1b
+size 11930938
diff --git a/checkpoint-180/rng_state_0.pth b/checkpoint-180/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a4bf3c82228f9034814d7c954087afe9a1fa5b5f
--- /dev/null
+++ b/checkpoint-180/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:391491e98ac692b7e5835106d7f5cbacd49cdd07a44cc27d7b58742c4dc066be
+size 15024
diff --git a/checkpoint-180/rng_state_1.pth b/checkpoint-180/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e752b39c1c9894ab1a1d41365fe600feb5bd143c
--- /dev/null
+++ b/checkpoint-180/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0d1421c313b5149e2c40765ea51d8b558dc49bf8f3196a5026730b9ddc0b1ef
+size 15024
diff --git a/checkpoint-180/rng_state_2.pth b/checkpoint-180/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..882ddf2069a3f27d9a74bdc6aea9b1266bbbb173
--- /dev/null
+++ b/checkpoint-180/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d9ec9798d1f0478430ddf469073d6b795a3038d3094c7819b37dd37b6e57e60
+size 15024
diff --git a/checkpoint-180/rng_state_3.pth b/checkpoint-180/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7be2c9933f6912330499d4df75c20c65d8289227
--- /dev/null
+++ b/checkpoint-180/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0c2257b0791cfa018dcbff0ed5913d5cd0cf1f42b4364228919a2f3bfbe68d1
+size 15024
diff --git a/checkpoint-180/scheduler.pt b/checkpoint-180/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1e1dfc302893fcb026f2819752de5b88b96f998a
--- /dev/null
+++ b/checkpoint-180/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50ff74b62d82fd8adec674c6b8842a8d81651ef73957ce1c752e54ee1f46269d
+size 1064
diff --git a/checkpoint-180/trainer_state.json b/checkpoint-180/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..e6782e225320c71867c19736d613ac344450a67f
--- /dev/null
+++ b/checkpoint-180/trainer_state.json
@@ -0,0 +1,147 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.013061461432406937,
+  "eval_steps": 500,
+  "global_step": 180,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-180/training_args.bin b/checkpoint-180/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-180/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-190/README.md b/checkpoint-190/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-190/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-190/adapter_config.json b/checkpoint-190/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-190/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-190/adapter_model.safetensors b/checkpoint-190/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9444109f7b9f868cc6e282557dda897f96469e1a
--- /dev/null
+++ b/checkpoint-190/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:164c0a9d17f2ad649c677f450144d4d7a9acecabb4558f157590a62c7c8e8b55
+size 5919456
diff --git a/checkpoint-190/optimizer.pt b/checkpoint-190/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..05a9ff22dfb8b657e3b8210c6e40ff08d5c1832d
--- /dev/null
+++ b/checkpoint-190/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea35bcd920d0fc3e9070fbcf7b9550826c51eaf3d486884fd8a6a142853287bb
+size 11930938
diff --git a/checkpoint-190/rng_state_0.pth b/checkpoint-190/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e0c0211e449e0fe4cae423b501f0a6079a134f3f
--- /dev/null
+++ b/checkpoint-190/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:634187bc0806802ba140fc58ef1da9ad47cab37b0a88b924abc908f1a6e48084
+size 15024
diff --git a/checkpoint-190/rng_state_1.pth b/checkpoint-190/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..db22be4a1419388cfcaec36e89d291d8473c2383
--- /dev/null
+++ b/checkpoint-190/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc7b535a8dc270694543ed47c3a9d756c8e9e91eaeaf7606c23e1f1871e9f459
+size 15024
diff --git a/checkpoint-190/rng_state_2.pth b/checkpoint-190/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..88e4ce07c281508ccd300c390a5fd12b5ea5e41f
--- /dev/null
+++ b/checkpoint-190/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cdefdbea22c530e923aa2c703921cde9f526199183b066c1f92041ae90ee88cd
+size 15024
diff --git a/checkpoint-190/rng_state_3.pth b/checkpoint-190/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d03fce08df96a573b7d5dcf7a498f6765950b5aa
--- /dev/null
+++ b/checkpoint-190/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a5cf3867ec2aff94e354518b57eb2ae50d1a25d4c71ee748970519e8e123f82
+size 15024
diff --git a/checkpoint-190/scheduler.pt b/checkpoint-190/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a94fb1da918c40960bb23a0433ecc5e567eaa5db
--- /dev/null
+++ b/checkpoint-190/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5225217e6c3511691da2e482ae99a4b1234baee31781f7a3759c86c8b538736d
+size 1064
diff --git a/checkpoint-190/trainer_state.json b/checkpoint-190/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..c93f526851e17e642f41478ff690c1ab2eb0c280
--- /dev/null
+++ b/checkpoint-190/trainer_state.json
@@ -0,0 +1,154 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.013787098178651766,
+  "eval_steps": 500,
+  "global_step": 190,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-190/training_args.bin b/checkpoint-190/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-190/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-20/README.md b/checkpoint-20/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-20/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-20/adapter_config.json b/checkpoint-20/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-20/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-20/adapter_model.safetensors b/checkpoint-20/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9c653a5704507410bd12154dea5dadb0819f9c8f
--- /dev/null
+++ b/checkpoint-20/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2a312bbd3fe9af1d70dd7fd68ec1803a69d657e4bc863e1b337b50c402b163e
+size 5919456
diff --git a/checkpoint-20/optimizer.pt b/checkpoint-20/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..82929ffcd1464972fa87948e53cded4fd6491331
--- /dev/null
+++ b/checkpoint-20/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93b4e0538791301edba6f0256f5a933b7cf9e710601259a7e0e4fcd357f36836
+size 11930938
diff --git a/checkpoint-20/rng_state_0.pth b/checkpoint-20/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..27c26966ba11b1537e03fb58b5745c13bda8e07c
--- /dev/null
+++ b/checkpoint-20/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f1260d1909498eca402d05c2a419952d47a5ad70175dcbdd8b00c91a5fc45fa
+size 15024
diff --git a/checkpoint-20/rng_state_1.pth b/checkpoint-20/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..db68c71e26cb6f3a051cebb674e1c59e4a372b54
--- /dev/null
+++ b/checkpoint-20/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db4473e907d1644f0438ef7f0c7704d02103e85b9719510e499dcd424dd7a698
+size 15024
diff --git a/checkpoint-20/rng_state_2.pth b/checkpoint-20/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4793fad96abae27f6b655664bfc74d07201da74e
--- /dev/null
+++ b/checkpoint-20/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc5c885ac98cfdee09d2cbd67013c6d68e4008188e8818b09c31ef431a1d7eb8
+size 15024
diff --git a/checkpoint-20/rng_state_3.pth b/checkpoint-20/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..989dad2ada4a51ebd64cb87b3e27f27217ee5d56
--- /dev/null
+++ b/checkpoint-20/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1e02830b4a3aa3e8be30cc4b5cdb6d781ffb6ba2f9b7fc7de429e8f14ff9eb2
+size 15024
diff --git a/checkpoint-20/scheduler.pt b/checkpoint-20/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..573b44d468343889e6e4d7d464515e2ffa148efb
--- /dev/null
+++ b/checkpoint-20/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1a773b7755c6ffc7eac6dc01576c119f78c4b92bb8a89b09636623d2fa83367
+size 1064
diff --git a/checkpoint-20/trainer_state.json b/checkpoint-20/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..8885836fae9a151d6c4fb6974ba720485dda87c4
--- /dev/null
+++ b/checkpoint-20/trainer_state.json
@@ -0,0 +1,35 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.0014512734924896596,
+  "eval_steps": 500,
+  "global_step": 20,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-20/training_args.bin b/checkpoint-20/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-20/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-200/README.md b/checkpoint-200/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-200/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-200/adapter_config.json b/checkpoint-200/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-200/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-200/adapter_model.safetensors b/checkpoint-200/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1f63d1861ffd21bff7fd4fe40bec06d1b8e6dc85
--- /dev/null
+++ b/checkpoint-200/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b9de44f76b2131043feb775d5008ccb1f63c563cf6dbb01f156f5836d93ff30
+size 5919456
diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..30b8b128508a22c9c90cd8d5b451898619a67b89
--- /dev/null
+++ b/checkpoint-200/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a74bd85064948532f802492a6fd85a5a840068403feb98bf3a5572c6f37a7b0c
+size 11930938
diff --git a/checkpoint-200/rng_state_0.pth b/checkpoint-200/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cbe6ed9bd7015ba53a00eac5dbf6c207bee426f9
--- /dev/null
+++ b/checkpoint-200/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f1cba52d741c919af5485dbd6f2f7f1d3e0a85d6360bd235ada5b0b422cbb85
+size 15024
diff --git a/checkpoint-200/rng_state_1.pth b/checkpoint-200/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5e368466a71fee96b42b881f21bf712a4da6d76f
--- /dev/null
+++ b/checkpoint-200/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6df0bf21b793eda3c40de2d2c78534d169305cdd2f29bdcd9cb73fac0665821
+size 15024
diff --git a/checkpoint-200/rng_state_2.pth b/checkpoint-200/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5efcda4ce23425b42660c0142199f06e24c4ecb6
--- /dev/null
+++ b/checkpoint-200/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6248867138fb350cb0f44cdac9e1a95bb8b6784149c2399be64db251dc3dd906
+size 15024
diff --git a/checkpoint-200/rng_state_3.pth b/checkpoint-200/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e92c71a16815cfe7ecbfb3568fc623dee858bc62
--- /dev/null
+++ b/checkpoint-200/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eef7858543a2b266a803e1226a5565f91deb128b83aa5d1d3a7d6881fecffcd3
+size 15024
diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a855910d83131677e185f1c8c256645bc68eb740
--- /dev/null
+++ b/checkpoint-200/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17f6dba7a5c52bace05cdf4294c902ba3c6d8e1e3b980580b59c6c81ef0a7893
+size 1064
diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..b787bf8140763f7532232d0a273d6c90bbac5782
--- /dev/null
+++ b/checkpoint-200/trainer_state.json
@@ -0,0 +1,161 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.014512734924896596,
+  "eval_steps": 500,
+  "global_step": 200,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-200/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-210/README.md b/checkpoint-210/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-210/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-210/adapter_config.json b/checkpoint-210/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-210/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-210/adapter_model.safetensors b/checkpoint-210/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ee2f4b00881d2682b683982d7ee56b51d260f71f
--- /dev/null
+++ b/checkpoint-210/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a96454df288b2084f7fc5d2ce332391335ddeb63992d67d8ea89de2eced2314b
+size 5919456
diff --git a/checkpoint-210/optimizer.pt b/checkpoint-210/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7ac7484aff3c36f879148406eabe92f67e489b07
--- /dev/null
+++ b/checkpoint-210/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3d8c93f96a5b7a98d07c9b2ffaf8b40461b301ebab4c3c29a13841d3a2411a9
+size 11930938
diff --git a/checkpoint-210/rng_state_0.pth b/checkpoint-210/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b50684fe0b3ee395929d563c6d176ea95f130e03
--- /dev/null
+++ b/checkpoint-210/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0758b52eaf66a9d3841ff67a6781c30823a456df25f8d8dce667c6e30632205
+size 15024
diff --git a/checkpoint-210/rng_state_1.pth b/checkpoint-210/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a59ad732531e10e93f77e6abe03bfa81c396d95c
--- /dev/null
+++ b/checkpoint-210/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e2a0a6d685c08e2c32c6dcdc27b6a2ecc5d4df518a6f8cb4363bb120173c519
+size 15024
diff --git a/checkpoint-210/rng_state_2.pth b/checkpoint-210/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..431b9592b6ebe9e50e467b14e03f71b9b0348a95
--- /dev/null
+++ b/checkpoint-210/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:063e66491e187afaf0b8ca5021700297f4aff6c6851e81f4c8a2b7755b680287
+size 15024
diff --git a/checkpoint-210/rng_state_3.pth b/checkpoint-210/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..273cf65890be5f6484d1de3986ab4871c1b30e52
--- /dev/null
+++ b/checkpoint-210/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b36d1281b90013ae9d40beccbd5b80c807353ef2f41f404c56dcd2be42e1497
+size 15024
diff --git a/checkpoint-210/scheduler.pt b/checkpoint-210/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ad12e7e777d6d0a443259fe585b13e91d455b83e
--- /dev/null
+++ b/checkpoint-210/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60fa989c061d8ccbc637eb6bd5d82b01df1af4e5040ee093c05f982c41c21f71
+size 1064
diff --git a/checkpoint-210/trainer_state.json b/checkpoint-210/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..7d3f691bf10bde33baf0c23d9005caa6d4438fa2
--- /dev/null
+++ b/checkpoint-210/trainer_state.json
@@ -0,0 +1,168 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.015238371671141426,
+  "eval_steps": 500,
+  "global_step": 210,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-210/training_args.bin b/checkpoint-210/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-210/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-220/README.md b/checkpoint-220/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-220/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-220/adapter_config.json b/checkpoint-220/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-220/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-220/adapter_model.safetensors b/checkpoint-220/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7fe44c622343d53f15ad6fb33f4ac9dce6406f18
--- /dev/null
+++ b/checkpoint-220/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef73dcafe2294c18c17ca365f999d1f101c5955051d3fce4e457095a76d05dc9
+size 5919456
diff --git a/checkpoint-220/optimizer.pt b/checkpoint-220/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aa96f16ef0debf92738680e208810450e17ad33a
--- /dev/null
+++ b/checkpoint-220/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe465d6a103bf094015bbec360592cea1a8e2002b4bde56587a75049ba9811e7
+size 11930938
diff --git a/checkpoint-220/rng_state_0.pth b/checkpoint-220/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cd86dbca0e6b3468b401c07f26c1eb3fba27b6ba
--- /dev/null
+++ b/checkpoint-220/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:318482ffbb3682b198e21782725194b8e4a47144cd19447822b92c9741381e64
+size 15024
diff --git a/checkpoint-220/rng_state_1.pth b/checkpoint-220/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..98838ae1c3d248b66950bc4d113e7a7fd867389d
--- /dev/null
+++ b/checkpoint-220/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6649fb78ede3c075b0e7d5b7f1a9e91be8ec03fa87f75712323f2711c7a7e883
+size 15024
diff --git a/checkpoint-220/rng_state_2.pth b/checkpoint-220/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b6864283abb094df0fb052b1924e96aff0a106c8
--- /dev/null
+++ b/checkpoint-220/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d147c99bfe79a2f8bcaf0eef24750e3d64ada4e06a65d7908b38b89af99b831
+size 15024
diff --git a/checkpoint-220/rng_state_3.pth b/checkpoint-220/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c634d3a8e7a21b8baf51a0f3da6a906007acd1cc
--- /dev/null
+++ b/checkpoint-220/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f788cb78149d5b33e0c5eac77d7eea5e6e80a94245857d9ff13fa1685c3dfe3d
+size 15024
diff --git a/checkpoint-220/scheduler.pt b/checkpoint-220/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..da59ec0c4c693d32cb42555609daf4c48c8e0e99
--- /dev/null
+++ b/checkpoint-220/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc075f58f19172f6b872347b6745ebcecc2c76860fa1d796ca41fb34a2b43c64
+size 1064
diff --git a/checkpoint-220/trainer_state.json b/checkpoint-220/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..c0a4334db51a8e190fae8a5cb17d2d6adabfae9a
--- /dev/null
+++ b/checkpoint-220/trainer_state.json
@@ -0,0 +1,175 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.015964008417386256,
+  "eval_steps": 500,
+  "global_step": 220,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-220/training_args.bin b/checkpoint-220/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-220/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-230/README.md b/checkpoint-230/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-230/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-230/adapter_config.json b/checkpoint-230/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-230/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-230/adapter_model.safetensors b/checkpoint-230/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2815141785c7cae2ce04c88fe290c2d079680f65
--- /dev/null
+++ b/checkpoint-230/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5fde1791a20cfa61f94877079cd931eb8daacf86f0f9a5b79fbab1e8123539a
+size 5919456
diff --git a/checkpoint-230/optimizer.pt b/checkpoint-230/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2958b05289ac22e1a87b6d85793b3ff7c8ca2431
--- /dev/null
+++ b/checkpoint-230/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2eca2c863f4825df5f2990952ffb6e3449ca10b95b089041c4f21aaddb31156a
+size 11930938
diff --git a/checkpoint-230/rng_state_0.pth b/checkpoint-230/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c68750cce886cb775d35c5fccd5db3c3555363d5
--- /dev/null
+++ b/checkpoint-230/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f31b6861ef360b1f8f7ba30f91848c17eae922199d9a925854d74c60b6bcbe39
+size 15024
diff --git a/checkpoint-230/rng_state_1.pth b/checkpoint-230/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2ea9f284de25615ee3ead2b59b67ea7ad7fa4367
--- /dev/null
+++ b/checkpoint-230/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1f2affe943ec611905d18783b558f75ebb7f769d0bd8a15a6ac3fa317b59f80
+size 15024
diff --git a/checkpoint-230/rng_state_2.pth b/checkpoint-230/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..50afeddfc9d7a5cf759ddf0111437dcdf87fbc70
--- /dev/null
+++ b/checkpoint-230/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b9ad4f98473121dbffd26bf93eda8b37788dcf87d0c62ec4896117a4b45e596
+size 15024
diff --git a/checkpoint-230/rng_state_3.pth b/checkpoint-230/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cb088a7b95cc138773e6992c0e5150ff97b25138
--- /dev/null
+++ b/checkpoint-230/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d28066825e942cf92e047e62f797743944810e681a7cf5f6984ee017f90bb42
+size 15024
diff --git a/checkpoint-230/scheduler.pt b/checkpoint-230/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d590789a904a87490213b984f686c42d5584ffe7
--- /dev/null
+++ b/checkpoint-230/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f232375c4704bba2b51160b6530d8d2e92723eef4847e7bc68d51c2f15be4ad2
+size 1064
diff --git a/checkpoint-230/trainer_state.json b/checkpoint-230/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..46d64ff16b1eaca6a5de84b1cde6f7ec60a2e696
--- /dev/null
+++ b/checkpoint-230/trainer_state.json
@@ -0,0 +1,182 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.016689645163631087,
+  "eval_steps": 500,
+  "global_step": 230,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-230/training_args.bin b/checkpoint-230/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-230/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-240/README.md b/checkpoint-240/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-240/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-240/adapter_config.json b/checkpoint-240/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-240/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-240/adapter_model.safetensors b/checkpoint-240/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d467b6eff29cccda50db3f33bd35086ef8e63391
--- /dev/null
+++ b/checkpoint-240/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14fa8b1def8708b146136aeb81cd734aa257bf16dad751a3652c102c6c81a66e
+size 5919456
diff --git a/checkpoint-240/optimizer.pt b/checkpoint-240/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..31e7f54a82e146b1a3af0d38629ee166cb8802ed
--- /dev/null
+++ b/checkpoint-240/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:410b0178d740b128a5f32201e51241f9a40401143060f004f1c288f3f8f60535
+size 11930938
diff --git a/checkpoint-240/rng_state_0.pth b/checkpoint-240/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4d57f2e90384717bcce48b8519a5714de45f7b7b
--- /dev/null
+++ b/checkpoint-240/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58745d05e18e051e7d277a98eeb9a9590b39e8372075309b4931494def212d10
+size 15024
diff --git a/checkpoint-240/rng_state_1.pth b/checkpoint-240/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..576a70cfb7dbe639a00d7aa56a9e2934152d9cbf
--- /dev/null
+++ b/checkpoint-240/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c851cc404347ac489037c5246981664e3f033add977dafdd7d13ec0631bdd8a
+size 15024
diff --git a/checkpoint-240/rng_state_2.pth b/checkpoint-240/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9f2b9f2f88b4f81ef38beea9a7923a15c9673dbb
--- /dev/null
+++ b/checkpoint-240/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:acd66bcb4d4726b8ec9452a28d4fb5372003a38e8426423bf6bd28b672b0fdc1
+size 15024
diff --git a/checkpoint-240/rng_state_3.pth b/checkpoint-240/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..59179dc5ec4127589ae34dd3afb2222941d351f4
--- /dev/null
+++ b/checkpoint-240/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e91642e29f2f8b79a095419d1856de6be2673f1d24bbdd61708991800d148b6
+size 15024
diff --git a/checkpoint-240/scheduler.pt b/checkpoint-240/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d249baae1179a6a881db7254f354c211b32523ea
--- /dev/null
+++ b/checkpoint-240/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efa92c4aa5b6de481ee00999f89f089bfcaa5e31a4b58ec7cf178e255ba38377
+size 1064
diff --git a/checkpoint-240/trainer_state.json b/checkpoint-240/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d1a4ce7116decefeef79803ae0d83e013a5449d8
--- /dev/null
+++ b/checkpoint-240/trainer_state.json
@@ -0,0 +1,189 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.017415281909875915,
+  "eval_steps": 500,
+  "global_step": 240,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-240/training_args.bin b/checkpoint-240/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-240/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-250/README.md b/checkpoint-250/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-250/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-250/adapter_config.json b/checkpoint-250/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-250/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-250/adapter_model.safetensors b/checkpoint-250/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fcbcc884b3d9c3b4be205daa2163d8a4cf44c585
--- /dev/null
+++ b/checkpoint-250/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:799f1aa39fbf0cc86df1f0d266b1c3d1be74f627d77bf5198235b9a1c00335dd
+size 5919456
diff --git a/checkpoint-250/optimizer.pt b/checkpoint-250/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2766cc10ee370f641c5a89728c4bc988e35a7afc
--- /dev/null
+++ b/checkpoint-250/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca67f38a42b46b72b3ab6246d1ad418d299cb4b08767c0f1d27428a92c52d208
+size 11930938
diff --git a/checkpoint-250/rng_state_0.pth b/checkpoint-250/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4713b7423fa71ff9c6811be8f082773be8e24c4b
--- /dev/null
+++ b/checkpoint-250/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aee86a78259b6e02d1aff507611b7e0efb47c407b206da6ae187c3a6ef6ffb70
+size 15024
diff --git a/checkpoint-250/rng_state_1.pth b/checkpoint-250/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..988502460d1de09dce1295edb30d97fa555b1f39
--- /dev/null
+++ b/checkpoint-250/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f91fe3b0ad6571acecf97acf3b34552653da1cdb437cc92743635b1cb6378718
+size 15024
diff --git a/checkpoint-250/rng_state_2.pth b/checkpoint-250/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c5792f5fb28e71ad132ce8c5ffaf1303bfa7c21d
--- /dev/null
+++ b/checkpoint-250/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e72b70555625db2b4d0bb696afee6e3868cb327a5a00571b3b276989ebb8be1c
+size 15024
diff --git a/checkpoint-250/rng_state_3.pth b/checkpoint-250/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..927b7132d1f4bccee146819f32c23eb0588cd2ca
--- /dev/null
+++ b/checkpoint-250/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b05109309e52a227468d27e588e1b56c09dd9cd76c6417ad7e44f3e9f4ab243e
+size 15024
diff --git a/checkpoint-250/scheduler.pt b/checkpoint-250/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1c3bb0c5e22718bda379febc92e5c9b4948a0a5f
--- /dev/null
+++ b/checkpoint-250/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de19dc94189ebf1e14925005f6796f6a52242e853f3324035f9db061eeaf42e7
+size 1064
diff --git a/checkpoint-250/trainer_state.json b/checkpoint-250/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..03abd2774880a8e18ea1d464dd8e960a51d0dfb9
--- /dev/null
+++ b/checkpoint-250/trainer_state.json
@@ -0,0 +1,196 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.018140918656120747,
+  "eval_steps": 500,
+  "global_step": 250,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-250/training_args.bin b/checkpoint-250/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-250/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-260/README.md b/checkpoint-260/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-260/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-260/adapter_config.json b/checkpoint-260/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-260/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-260/adapter_model.safetensors b/checkpoint-260/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ed487979d0cd0259c86230142ca91553d5384869
--- /dev/null
+++ b/checkpoint-260/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c617549d16f4fab54dea79f7d94a53d5a967bbf2c342f218271c26f8cbc21bd2
+size 5919456
diff --git a/checkpoint-260/optimizer.pt b/checkpoint-260/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..02f7b7a2f09f3ee6aa5ac3be38c374e9f7d05022
--- /dev/null
+++ b/checkpoint-260/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1df2a6c5a32b1bccc99ccc08b16dec0a617dfc8925da42428f32a5e37383433e
+size 11930938
diff --git a/checkpoint-260/rng_state_0.pth b/checkpoint-260/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5986159d9814c60e6e2c7b9845b8a11b6c9eacc4
--- /dev/null
+++ b/checkpoint-260/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19ab1100726ef208c33b7e659f3340a533b61f069b5fa9eb6355bac9eaa6c876
+size 15024
diff --git a/checkpoint-260/rng_state_1.pth b/checkpoint-260/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..94024e13f19412b7f0f443b53bc1c14c189b7f6f
--- /dev/null
+++ b/checkpoint-260/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a660538aa0ca7e33f80069de79c46bca026a35c420ff5d83cfef38640eadb1c3
+size 15024
diff --git a/checkpoint-260/rng_state_2.pth b/checkpoint-260/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c2d39895499f444760f7eeb20e7cb96d49db4c69
--- /dev/null
+++ b/checkpoint-260/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed0d5232504ff0a49b85a6a76bab1542fdb729b1c77d3ec26fe42bde3d7656b4
+size 15024
diff --git a/checkpoint-260/rng_state_3.pth b/checkpoint-260/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ac757b8dda1aa5532598acd28192377f1f03d79f
--- /dev/null
+++ b/checkpoint-260/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae8f2759bd590d484c8441009c0eb397ac428cc143357d9d070f513c126b232b
+size 15024
diff --git a/checkpoint-260/scheduler.pt b/checkpoint-260/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6bd5e7c580d5ceafba8be6b88399d6ad4ae2299e
--- /dev/null
+++ b/checkpoint-260/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1410187d07644597aeaed65f2e838a84b30978b335ea35377c8509d3a8edcc26
+size 1064
diff --git a/checkpoint-260/trainer_state.json b/checkpoint-260/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..96bd54702d6a265eaf396ee8f2f3b3966ba3d692
--- /dev/null
+++ b/checkpoint-260/trainer_state.json
@@ -0,0 +1,203 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.018866555402365575,
+  "eval_steps": 500,
+  "global_step": 260,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-260/training_args.bin b/checkpoint-260/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-260/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-270/README.md b/checkpoint-270/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-270/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-270/adapter_config.json b/checkpoint-270/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-270/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-270/adapter_model.safetensors b/checkpoint-270/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..41af1003b9060cb8f8c3acfaac36123ecdde6360
--- /dev/null
+++ b/checkpoint-270/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c1feecc578003b783238ef4c2f368144490d87631d274bdf39792cf5f5a308f
+size 5919456
diff --git a/checkpoint-270/optimizer.pt b/checkpoint-270/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6406a8973d8dce28b939a833017440417940359d
--- /dev/null
+++ b/checkpoint-270/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1f8c2404501568062ab5cace82421e2cd5b72ae9bd18926e55cb748f9a8236e
+size 11930938
diff --git a/checkpoint-270/rng_state_0.pth b/checkpoint-270/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f05233abd9598e87fb57c48529e24e3ea8b9517c
--- /dev/null
+++ b/checkpoint-270/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9030cf84602b044b4abc21ad0343a30164c20e803ab04dd65f33d7b34a84a29a
+size 15024
diff --git a/checkpoint-270/rng_state_1.pth b/checkpoint-270/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f19cc0e17ace2b704c08fe29a33c539f9465caa0
--- /dev/null
+++ b/checkpoint-270/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9565a4c8b0ca8999336de0db1c94d5bdfeb3570932d1a55ca515f5adc74c30a3
+size 15024
diff --git a/checkpoint-270/rng_state_2.pth b/checkpoint-270/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..705738e753a2818baa6f933831ca78f054c406f7
--- /dev/null
+++ b/checkpoint-270/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dfa790f05b0001741c55759d1b35bd8b4c645222299ae14beb864348afe0f49a
+size 15024
diff --git a/checkpoint-270/rng_state_3.pth b/checkpoint-270/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7b771036b0768a2aac8a9322e11698a375ac633c
--- /dev/null
+++ b/checkpoint-270/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5821590bbf0fe5f12ee494d5c97256f4c278acecd59663a42ff89e3cda932497
+size 15024
diff --git a/checkpoint-270/scheduler.pt b/checkpoint-270/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..714882b2b3f15583d0d725b1f4ad456706423d68
--- /dev/null
+++ b/checkpoint-270/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f638c267af3600255f2f6a3a5d895f3062d75f39cfdbfc723a2da32265e6bb3a
+size 1064
diff --git a/checkpoint-270/trainer_state.json b/checkpoint-270/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..8abaa60df8ab9050ce84d17a6f8d399f77a6c835
--- /dev/null
+++ b/checkpoint-270/trainer_state.json
@@ -0,0 +1,210 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.019592192148610407,
+  "eval_steps": 500,
+  "global_step": 270,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-270/training_args.bin b/checkpoint-270/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-270/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-280/README.md b/checkpoint-280/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-280/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-280/adapter_config.json b/checkpoint-280/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-280/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-280/adapter_model.safetensors b/checkpoint-280/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fb2098def7ce1de896c9dfc072cb084e7b75000b
--- /dev/null
+++ b/checkpoint-280/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3436fa4e49d48e766e9033d6d1947b3a66d7112f5bdc4b403e6d10b784824a71
+size 5919456
diff --git a/checkpoint-280/optimizer.pt b/checkpoint-280/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..091884b9df666eb09ef2298bbaa4d9be20b7270e
--- /dev/null
+++ b/checkpoint-280/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69cfb947bf0e9831efe4ba6ccc4cd48e5f8ba00ca48c0861798ff52e7dea56e2
+size 11930938
diff --git a/checkpoint-280/rng_state_0.pth b/checkpoint-280/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cd1fce629acf11a75ada7411064808a959e6d14e
--- /dev/null
+++ b/checkpoint-280/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6446078e83bac279eab0ee7d6a22ca16b5aad48614d84b0ceabb5981efe81b1
+size 15024
diff --git a/checkpoint-280/rng_state_1.pth b/checkpoint-280/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1cf7cf5a283eef502d048c98433bcf2585d24816
--- /dev/null
+++ b/checkpoint-280/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9887930987b82bd1aba09b72af6b46e3fd0bd6b94b09e189c9a8b690282cb47
+size 15024
diff --git a/checkpoint-280/rng_state_2.pth b/checkpoint-280/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4f4bdd0be477a59a0b1d5aa3dc566258061c76b1
--- /dev/null
+++ b/checkpoint-280/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f32c141fb3bd2e9dd486ef3b92d1447f6e803722470c82bae776bfe7efaeaf2
+size 15024
diff --git a/checkpoint-280/rng_state_3.pth b/checkpoint-280/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d182f550c0ef271622c08a54484dacbc3d5aaef2
--- /dev/null
+++ b/checkpoint-280/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ce98d0a1ee786c8ef3bf8704ad72999394c76b8ab6b488bf171176bdcba6805
+size 15024
diff --git a/checkpoint-280/scheduler.pt b/checkpoint-280/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bda56e87af2567efe228564f832d426c868d085f
--- /dev/null
+++ b/checkpoint-280/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82863359e29a0bd506d47946a3c7404612b6e13c53e2a87354669f7e6339251d
+size 1064
diff --git a/checkpoint-280/trainer_state.json b/checkpoint-280/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..9a61732f37d91fd4cc0396c373116d75384a1f8d
--- /dev/null
+++ b/checkpoint-280/trainer_state.json
@@ -0,0 +1,217 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.020317828894855235,
+  "eval_steps": 500,
+  "global_step": 280,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-280/training_args.bin b/checkpoint-280/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-280/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-290/README.md b/checkpoint-290/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-290/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-290/adapter_config.json b/checkpoint-290/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-290/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-290/adapter_model.safetensors b/checkpoint-290/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3bc7d401370fea205e68706dfdc46ca7638045fd
--- /dev/null
+++ b/checkpoint-290/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab10a2c94eb29640029a2d4da2b5724c612fb5e0a9db066be8722d0fb04c01fe
+size 5919456
diff --git a/checkpoint-290/optimizer.pt b/checkpoint-290/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2dd608443d4f1f2a17482110a068f28b5a39c1c6
--- /dev/null
+++ b/checkpoint-290/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e82d680068368eb15e1e391a5e5907aaa075a3908e3166c6b1f6752d0983a82
+size 11930938
diff --git a/checkpoint-290/rng_state_0.pth b/checkpoint-290/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6142001eee84d6c48f80ffd3eedba90655428239
--- /dev/null
+++ b/checkpoint-290/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10ecf202489cdffda7040c277e81be6d180addb568115e94b3291f496d26b1c2
+size 15024
diff --git a/checkpoint-290/rng_state_1.pth b/checkpoint-290/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..21df1ceab9a496591f3ec6c06290e5f1bcd0a695
--- /dev/null
+++ b/checkpoint-290/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ad7fd7bfb4cb1cf9da691ddec7b2f8dd580d0d8eba960dee03b17c3c70b5966
+size 15024
diff --git a/checkpoint-290/rng_state_2.pth b/checkpoint-290/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4fa86dc44761d2e9858557cf0ffea31e134836fc
--- /dev/null
+++ b/checkpoint-290/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea8c68c0f18cc1ad18cc3e64329385de34fffb30cc23949ad29050aa152ddd21
+size 15024
diff --git a/checkpoint-290/rng_state_3.pth b/checkpoint-290/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d4bb0dbace4cb08643e074b55baac30dbac47009
--- /dev/null
+++ b/checkpoint-290/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee60dc50d220aef980fc46b6da6b6d934b450850569607e58ca21bc4e13b47e5
+size 15024
diff --git a/checkpoint-290/scheduler.pt b/checkpoint-290/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fddb75ed4c7a9b880a08abdd58f66fd0a347af17
--- /dev/null
+++ b/checkpoint-290/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e834562b4432e6854c373ffc7ace1c3577893bc70957933db7bbb539db0d4da
+size 1064
diff --git a/checkpoint-290/trainer_state.json b/checkpoint-290/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..7adab4cd5fff6ca62c0e522ac1331582ba35071b
--- /dev/null
+++ b/checkpoint-290/trainer_state.json
@@ -0,0 +1,224 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.021043465641100066,
+  "eval_steps": 500,
+  "global_step": 290,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-290/training_args.bin b/checkpoint-290/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-290/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-30/README.md b/checkpoint-30/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-30/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-30/adapter_config.json b/checkpoint-30/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-30/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-30/adapter_model.safetensors b/checkpoint-30/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bdca01083ee6fc6b0c47fdafc128a9fa3c11991f
--- /dev/null
+++ b/checkpoint-30/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6271038a20c3c9bc064ca11a24f30904d4c86bae2759840757b7e4dfb05780eb
+size 5919456
diff --git a/checkpoint-30/optimizer.pt b/checkpoint-30/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..15642dd88e6cf54d75aedc612daa675baebf80e4
--- /dev/null
+++ b/checkpoint-30/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5ece0decc4408343f89ce2138b9c7fee48c639ba3ec10c693dc19553a0af47e
+size 11930938
diff --git a/checkpoint-30/rng_state_0.pth b/checkpoint-30/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cbb00ec3c77a5ef34c808105a14561e4b3fb4574
--- /dev/null
+++ b/checkpoint-30/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80c2f27d97a10c31a8638a58d5088a128efbcbf227dc925bea21ae2142096923
+size 15024
diff --git a/checkpoint-30/rng_state_1.pth b/checkpoint-30/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1a367dd971913b0c76a7b19d3948876472b05ca3
--- /dev/null
+++ b/checkpoint-30/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e608d3782d947f3f883a4b59c9c0fd4233eb58985d44365aa874198c44057567
+size 15024
diff --git a/checkpoint-30/rng_state_2.pth b/checkpoint-30/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4a6fa526b9a44edd2692eda125ed70f3eceb200a
--- /dev/null
+++ b/checkpoint-30/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18fcde1cc7e91d886643e2dc65d40208b9dd4186781b64e6fb5cd3cc4748be1c
+size 15024
diff --git a/checkpoint-30/rng_state_3.pth b/checkpoint-30/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e875f1ba6b679566fe9122b9653f396cf58dba01
--- /dev/null
+++ b/checkpoint-30/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a65a67b1e90f3a9e2bf7adf3069c8e8844c3681189f60cdf31ba20176ffe8ab
+size 15024
diff --git a/checkpoint-30/scheduler.pt b/checkpoint-30/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c528ef1f795f4cbc220a7254003a8899501419dc
--- /dev/null
+++ b/checkpoint-30/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b3eae29eb3e78ae1aa1696d1473adbe4f79b7c0d903fb12a093a820c6e37528
+size 1064
diff --git a/checkpoint-30/trainer_state.json b/checkpoint-30/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..e6e63e9d3d4fbf0bee4c0a02126ebf214aa15fd1
--- /dev/null
+++ b/checkpoint-30/trainer_state.json
@@ -0,0 +1,42 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.0021769102387344894,
+  "eval_steps": 500,
+  "global_step": 30,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-30/training_args.bin b/checkpoint-30/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-30/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-300/README.md b/checkpoint-300/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-300/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-300/adapter_config.json b/checkpoint-300/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-300/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-300/adapter_model.safetensors b/checkpoint-300/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7ce613b6609312e62c0218d9790a452009a127de
--- /dev/null
+++ b/checkpoint-300/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4117b1f588cff3ad49326224d7e74c67523c54035596ab6cddf64648834dc5c2
+size 5919456
diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2df34aacc2329d3fe47406ee13bbea9777457218
--- /dev/null
+++ b/checkpoint-300/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:881bee80abeaf5f6ffbc474e1e519be1cf3e762ff72695815348470640712771
+size 11930938
diff --git a/checkpoint-300/rng_state_0.pth b/checkpoint-300/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..83dd428249d32d9afc5469ec82c2173ef48cdd36
--- /dev/null
+++ b/checkpoint-300/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ffddd2869d266e566aedf316e46ee62978bba1fb4be250207ab9de7330f5477
+size 15024
diff --git a/checkpoint-300/rng_state_1.pth b/checkpoint-300/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4001b60d8cac6c2628f9204935b6e232aef09474
--- /dev/null
+++ b/checkpoint-300/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9e1971b3b34ac302cb20e4a3bde6f2cef6b53c750487560ab2e417d30d34742
+size 15024
diff --git a/checkpoint-300/rng_state_2.pth b/checkpoint-300/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..59b9c92f8e994188209d7409c22bfa335e3d1655
--- /dev/null
+++ b/checkpoint-300/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e886fc43e3ae110a3b6320792c406afdc247f52e086c52eb1a507bcce48c90e1
+size 15024
diff --git a/checkpoint-300/rng_state_3.pth b/checkpoint-300/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f45acd502d6345e61bfae08006c2812d8c79c716
--- /dev/null
+++ b/checkpoint-300/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d01f302fec204113c70aea51b32f139f52071a27440a434f15b42069147de2f
+size 15024
diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ee797801dfc28b41fd5cd657d6e3c443c8bd3c28
--- /dev/null
+++ b/checkpoint-300/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3eaba3c988582e6075bdeca9da5532aa56292b6ce5ab4d5bab7465a77c018e70
+size 1064
diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..5d7526e17e13c96d31419119998958f521a234fb
--- /dev/null
+++ b/checkpoint-300/trainer_state.json
@@ -0,0 +1,231 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.021769102387344894,
+  "eval_steps": 500,
+  "global_step": 300,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-300/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-310/README.md b/checkpoint-310/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-310/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-310/adapter_config.json b/checkpoint-310/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-310/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-310/adapter_model.safetensors b/checkpoint-310/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ef243b2806a47db51cb909e1206d24c8e15de786
--- /dev/null
+++ b/checkpoint-310/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af4fbcec87ec46cd6d1cb59085353c580cf70fd01ce8af1a74504ed4d9828cdf
+size 5919456
diff --git a/checkpoint-310/optimizer.pt b/checkpoint-310/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5ad7adf818a194b7a5b7886ae4837841eaf9a2d8
--- /dev/null
+++ b/checkpoint-310/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e91bf96f8e2131fd219cf4bcf076aaf8b110abc811fcf6f8d89b0ae96e6dc83
+size 11930938
diff --git a/checkpoint-310/rng_state_0.pth b/checkpoint-310/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..88a1a410a9839c59a81b645d8d0e0acd1e61b640
--- /dev/null
+++ b/checkpoint-310/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0f8d5aaec2381dce03acaa023b7714b9af8d74990fdeb63144149174f500347
+size 15024
diff --git a/checkpoint-310/rng_state_1.pth b/checkpoint-310/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..07c9736cb7d1d7e98a583df40c495d90fe7b986c
--- /dev/null
+++ b/checkpoint-310/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1cf59769c800800834cadfdea32ecd38fd26157f2dc68fd12df2d99fab744ad3
+size 15024
diff --git a/checkpoint-310/rng_state_2.pth b/checkpoint-310/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4553fe9252e0e1d8c88256e83f57ac07b232b6e4
--- /dev/null
+++ b/checkpoint-310/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60baa7f85097b313ee9a276c0945ceffced6909abc5d6dd56dcfd8155c025f33
+size 15024
diff --git a/checkpoint-310/rng_state_3.pth b/checkpoint-310/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9f2e95abd7c126fc81cd55c6bbc98b7e11ac7c6f
--- /dev/null
+++ b/checkpoint-310/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bce7a9dfe18b09bd1249dd5b9d6baea618d6f6ca19b767ce0763cdd363d19121
+size 15024
diff --git a/checkpoint-310/scheduler.pt b/checkpoint-310/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4d87905a926126fe2c04e8ce831b9d70eeb74d08
--- /dev/null
+++ b/checkpoint-310/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd4883c96e7691c930a918b480d0ceed9ba239909d04b256832dccc1c2432b75
+size 1064
diff --git a/checkpoint-310/trainer_state.json b/checkpoint-310/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..6a1408f0fa925cb5ec719b422394a31020aa31d5
--- /dev/null
+++ b/checkpoint-310/trainer_state.json
@@ -0,0 +1,238 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.022494739133589726,
+  "eval_steps": 500,
+  "global_step": 310,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-310/training_args.bin b/checkpoint-310/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-310/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-320/README.md b/checkpoint-320/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-320/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-320/adapter_config.json b/checkpoint-320/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-320/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-320/adapter_model.safetensors b/checkpoint-320/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0769a6f3703e350730abe3e1a1f666d61cfb681a
--- /dev/null
+++ b/checkpoint-320/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eec3e3d6617c2a0480824d01d64b77baf3bb787582d12534240d17f540b515fd
+size 5919456
diff --git a/checkpoint-320/optimizer.pt b/checkpoint-320/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fcd453da5785d3d851205a3c3cf797948fdfe667
--- /dev/null
+++ b/checkpoint-320/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba3321a33eae49a565def4bc422af4814ca9e898d6adebadf39cb7d8abfe471b
+size 11930938
diff --git a/checkpoint-320/rng_state_0.pth b/checkpoint-320/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2ffa711cdbf46a981ef6ea27285388e2fe15db49
--- /dev/null
+++ b/checkpoint-320/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6b3a9c1f3554ec29be8c263a22a1d44c245d9b40f20469de6dc0c695c6649a2
+size 15024
diff --git a/checkpoint-320/rng_state_1.pth b/checkpoint-320/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ecaf70827b33cabcc9c59f1fb242c81694ebb5a5
--- /dev/null
+++ b/checkpoint-320/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61e4a523b11efe5f35e45cde3d5d8dcb6dfbfaaf813cd1384fb442a47c85a381
+size 15024
diff --git a/checkpoint-320/rng_state_2.pth b/checkpoint-320/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d5ffb3346d9302117c04f247348b5f9c6a7e19a9
--- /dev/null
+++ b/checkpoint-320/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:614ebb7f79d70899798c0b6a06deadbcdd2910c4ee4fc68b8389130ffac98ae6
+size 15024
diff --git a/checkpoint-320/rng_state_3.pth b/checkpoint-320/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9e22ed527fccaef238bb929b2165d11181bb5f6f
--- /dev/null
+++ b/checkpoint-320/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b5f52caa8ac046bae234daa865a00e10f03e7d7beda6278af9db5a92b47db8d
+size 15024
diff --git a/checkpoint-320/scheduler.pt b/checkpoint-320/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..735c6237f22fd8e438e6e033d9a1cfecbf298f0d
--- /dev/null
+++ b/checkpoint-320/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4bcbe817767a87c652072564884ed71d790e519f1cf12104643b7968f801b30f
+size 1064
diff --git a/checkpoint-320/trainer_state.json b/checkpoint-320/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..7a8c1a25d2afeffdb1a295891ce125af88699582
--- /dev/null
+++ b/checkpoint-320/trainer_state.json
@@ -0,0 +1,245 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.023220375879834554,
+  "eval_steps": 500,
+  "global_step": 320,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 6.15625,
+      "learning_rate": 4.996810467247207e-05,
+      "loss": 0.3869,
+      "step": 320
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-320/training_args.bin b/checkpoint-320/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-320/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-330/README.md b/checkpoint-330/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-330/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-330/adapter_config.json b/checkpoint-330/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-330/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-330/adapter_model.safetensors b/checkpoint-330/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..725d1a3380321bf062ec70186eead609afaab974
--- /dev/null
+++ b/checkpoint-330/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5ac71533d60926bcf54a5af88cfe15237cdd7660fba88b3423226dbcf0ad0aa
+size 5919456
diff --git a/checkpoint-330/optimizer.pt b/checkpoint-330/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4eb93c89799e8792737d421db3226a45ab3e2ea0
--- /dev/null
+++ b/checkpoint-330/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd361f1e5901a267e11f0b81821ec0c63434d335a301f5639c8cc3a1e401b3a4
+size 11930938
diff --git a/checkpoint-330/rng_state_0.pth b/checkpoint-330/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1a4494144df2c2b033835bfdae880b9bd5fbe543
--- /dev/null
+++ b/checkpoint-330/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1fe1229961077eeb711058a791ec8b58ee9fec26f7d7c11fdbbcc280ec7c903e
+size 15024
diff --git a/checkpoint-330/rng_state_1.pth b/checkpoint-330/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..df60d6b20d25ac640d38b4eb061e24f626baf578
--- /dev/null
+++ b/checkpoint-330/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4667fbba6fad5df1de226ac6cf5d3cfc1816528968c1ff259ddaa5db6ab723d
+size 15024
diff --git a/checkpoint-330/rng_state_2.pth b/checkpoint-330/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..422da36741950ba5633688a85dbba07997ffa96d
--- /dev/null
+++ b/checkpoint-330/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:645b737c6ccdce91522bc55c30a2afbad0340faa70101f8ff8b95ffac340f2d3
+size 15024
diff --git a/checkpoint-330/rng_state_3.pth b/checkpoint-330/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..33bee041b83116b398d03986f8068b1f5aca145e
--- /dev/null
+++ b/checkpoint-330/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c835b6854556a7935eaeab73c709db1e4176fe90c9cfd79a6e187768b1973f7e
+size 15024
diff --git a/checkpoint-330/scheduler.pt b/checkpoint-330/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3fccf0b3eac8c0606c93b540da32c4f62b64505e
--- /dev/null
+++ b/checkpoint-330/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44ed9724bf2765d5d9e749d4bd517a8e5ab17ef0b551b435986fad85652c4e31
+size 1064
diff --git a/checkpoint-330/trainer_state.json b/checkpoint-330/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d03e7ac3c9a8943133a1d1ca4a7fedd3fc502ccd
--- /dev/null
+++ b/checkpoint-330/trainer_state.json
@@ -0,0 +1,252 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.023946012626079385,
+  "eval_steps": 500,
+  "global_step": 330,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 6.15625,
+      "learning_rate": 4.996810467247207e-05,
+      "loss": 0.3869,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 2.03125,
+      "learning_rate": 4.9965139887167856e-05,
+      "loss": 0.2738,
+      "step": 330
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-330/training_args.bin b/checkpoint-330/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-330/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-340/README.md b/checkpoint-340/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-340/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-340/adapter_config.json b/checkpoint-340/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-340/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-340/adapter_model.safetensors b/checkpoint-340/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..48be1c897f9ed34d36962514718649268380502d
--- /dev/null
+++ b/checkpoint-340/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ffa4b514fab6ede88c942be182586302d12ecd50c193c7d8099d78fd3736f1d2
+size 5919456
diff --git a/checkpoint-340/optimizer.pt b/checkpoint-340/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c77beeb0d538131bb5698f305f48c9715c323889
--- /dev/null
+++ b/checkpoint-340/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bad6d0290a28393f314d441221d8bd28066545d594be811a4c47a6f3a5ce469
+size 11930938
diff --git a/checkpoint-340/rng_state_0.pth b/checkpoint-340/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..852562f5d839efc973e59f76ff158bef4f8e4cd0
--- /dev/null
+++ b/checkpoint-340/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d1b37c0c39f42b0e731b0ce8381fd1d6e61522adb9c7a52085f3bbaaa137c81
+size 15024
diff --git a/checkpoint-340/rng_state_1.pth b/checkpoint-340/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e1a1c465a89dc7dd1707da72a449dfca08e4a622
--- /dev/null
+++ b/checkpoint-340/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d442c8c51b6e14b41f3c95d25b451a65ffae4f5ce30d1f84755ff3615eff9d13
+size 15024
diff --git a/checkpoint-340/rng_state_2.pth b/checkpoint-340/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5336fc7d06fe88de7eb2ea2e60267f838924b3ff
--- /dev/null
+++ b/checkpoint-340/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c48003a40f042cbb527f53d852225776c8a6177157abca81cdaae7b56d7ad370
+size 15024
diff --git a/checkpoint-340/rng_state_3.pth b/checkpoint-340/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1b7fb2554acc0513bd93021720919a5820929522
--- /dev/null
+++ b/checkpoint-340/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7c6384de2f8695f35e1d9074eabb09aa3122a56501ee571f67417e923b30de9
+size 15024
diff --git a/checkpoint-340/scheduler.pt b/checkpoint-340/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..81723370431c5232b593ede7b6194b4b9df82543
--- /dev/null
+++ b/checkpoint-340/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a3cf1a4dcc6bad570af56fa4d51d9ed193402a0d912bc9bfb7c5fb4d8de68df
+size 1064
diff --git a/checkpoint-340/trainer_state.json b/checkpoint-340/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d674f9a45f174cb45fe26e047ef3b31a25515be6
--- /dev/null
+++ b/checkpoint-340/trainer_state.json
@@ -0,0 +1,259 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.024671649372324213,
+  "eval_steps": 500,
+  "global_step": 340,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 6.15625,
+      "learning_rate": 4.996810467247207e-05,
+      "loss": 0.3869,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 2.03125,
+      "learning_rate": 4.9965139887167856e-05,
+      "loss": 0.2738,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 2.9375,
+      "learning_rate": 4.996204345883278e-05,
+      "loss": 0.3396,
+      "step": 340
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-340/training_args.bin b/checkpoint-340/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-340/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-350/README.md b/checkpoint-350/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-350/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-350/adapter_config.json b/checkpoint-350/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-350/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-350/adapter_model.safetensors b/checkpoint-350/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6b1a2a734eaafd51e213f148a16a111dd03f0d8d
--- /dev/null
+++ b/checkpoint-350/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f904ff51a14addd78f0ac599c5f96f7d9ea9c3265b7c53afba08277bbc6a8ace
+size 5919456
diff --git a/checkpoint-350/optimizer.pt b/checkpoint-350/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dea931819a13bccff6634666a7a871b54608dc04
--- /dev/null
+++ b/checkpoint-350/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e09107ce06fd0435ada4eae6cf45eb2da98db2304bb80f2996e9b17e6d5179a
+size 11930938
diff --git a/checkpoint-350/rng_state_0.pth b/checkpoint-350/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..72b6b5ab294d04c9a2a8f70ff379be5b259f3f03
--- /dev/null
+++ b/checkpoint-350/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b1e18052127e2de91acf10f8b47f9cff3595a79025227c9c631027861fcbbb99
+size 15024
diff --git a/checkpoint-350/rng_state_1.pth b/checkpoint-350/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..dee41cae1bc010c0ec23dbd4302402489b178fe5
--- /dev/null
+++ b/checkpoint-350/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e3614fc6f0007750b432ee70bf5896a5fddfee40f3a86ad592a98f8a3a81125
+size 15024
diff --git a/checkpoint-350/rng_state_2.pth b/checkpoint-350/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9423e42ffc31c78a8ba0fa93c3d2e1a421197c6b
--- /dev/null
+++ b/checkpoint-350/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a65dc95baa1e904fdac16e865cb160a5aa7af67a382c615639a573fa8689b78
+size 15024
diff --git a/checkpoint-350/rng_state_3.pth b/checkpoint-350/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e90ce1d10c4f3201e342fb8d18fbfd37475d6975
--- /dev/null
+++ b/checkpoint-350/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2aa8ee9a4a09f7b99ca2be28334313f1a38cf621e55ec5862d12e8ea4b952fa7
+size 15024
diff --git a/checkpoint-350/scheduler.pt b/checkpoint-350/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..92c40dc291752db53442ee2c28a2a0dfa8f1ebcb
--- /dev/null
+++ b/checkpoint-350/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9e6dbdb793d0a5ec47053339626f51324635a0a0d69e91e607146be6107dc1a
+size 1064
diff --git a/checkpoint-350/trainer_state.json b/checkpoint-350/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..86eadc4b4683c44a8cbef12dd4215639d7e7b772
--- /dev/null
+++ b/checkpoint-350/trainer_state.json
@@ -0,0 +1,266 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.025397286118569045,
+  "eval_steps": 500,
+  "global_step": 350,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 6.15625,
+      "learning_rate": 4.996810467247207e-05,
+      "loss": 0.3869,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 2.03125,
+      "learning_rate": 4.9965139887167856e-05,
+      "loss": 0.2738,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 2.9375,
+      "learning_rate": 4.996204345883278e-05,
+      "loss": 0.3396,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.9958815403794546e-05,
+      "loss": 0.3736,
+      "step": 350
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-350/training_args.bin b/checkpoint-350/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-350/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-360/README.md b/checkpoint-360/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-360/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-360/adapter_config.json b/checkpoint-360/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-360/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-360/adapter_model.safetensors b/checkpoint-360/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a395e1ba26045fdffec7ca4fc41bbed04306c9c0
--- /dev/null
+++ b/checkpoint-360/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57ffe4d0a6ef428186e35f9f34edb9ee11be453d3253e40dab8de7280b70395d
+size 5919456
diff --git a/checkpoint-360/optimizer.pt b/checkpoint-360/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..320f7e3ec071855d408ef07888a6d9ab5f170ec0
--- /dev/null
+++ b/checkpoint-360/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c329cfc040c9601485e49e3bdd54bbcf997b6a9e9dc09098b1e59c63fe5d4f9
+size 11930938
diff --git a/checkpoint-360/rng_state_0.pth b/checkpoint-360/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..22ef40ec54530f943fe84df8a4eda896e507416b
--- /dev/null
+++ b/checkpoint-360/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6055b6a6600edf972449926a6d9e1f1a247175c461f5f21321d26e27fea2be59
+size 15024
diff --git a/checkpoint-360/rng_state_1.pth b/checkpoint-360/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3e7fec541c040cfdc3f98022f97c2a818688f6b1
--- /dev/null
+++ b/checkpoint-360/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1692dc7dd5df436d5ce9ab34d0ffb67ccf1ac9f6ad34e4c78261a9f441a6db1b
+size 15024
diff --git a/checkpoint-360/rng_state_2.pth b/checkpoint-360/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..22323d232189aca1f95a821b9cc3d44ed48f473b
--- /dev/null
+++ b/checkpoint-360/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f2a7b39392c289fa6613bad5680bc4f74a71bbd981f0a7e60593044eecf174e
+size 15024
diff --git a/checkpoint-360/rng_state_3.pth b/checkpoint-360/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a41b44d06701de70547b4cce762c5049bf8d84c3
--- /dev/null
+++ b/checkpoint-360/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4fcf7ef19ff07bd8503e736640eae507d4f24dbe6df23e2390549ba8d610ef4
+size 15024
diff --git a/checkpoint-360/scheduler.pt b/checkpoint-360/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9ec35fcd660d0db5d88ce202c3734bec67bd9909
--- /dev/null
+++ b/checkpoint-360/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45dbe572d8e55006c5e7a11ea49bec49dd65baec25b22b119f672f046312da9b
+size 1064
diff --git a/checkpoint-360/trainer_state.json b/checkpoint-360/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..cd9ce58406a207a90fb3ee3f5646b8763254d7a9
--- /dev/null
+++ b/checkpoint-360/trainer_state.json
@@ -0,0 +1,273 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.026122922864813873,
+  "eval_steps": 500,
+  "global_step": 360,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 6.15625,
+      "learning_rate": 4.996810467247207e-05,
+      "loss": 0.3869,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 2.03125,
+      "learning_rate": 4.9965139887167856e-05,
+      "loss": 0.2738,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 2.9375,
+      "learning_rate": 4.996204345883278e-05,
+      "loss": 0.3396,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.9958815403794546e-05,
+      "loss": 0.3736,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 5.09375,
+      "learning_rate": 4.995545573907492e-05,
+      "loss": 0.3458,
+      "step": 360
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-360/training_args.bin b/checkpoint-360/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-360/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-370/README.md b/checkpoint-370/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-370/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-370/adapter_config.json b/checkpoint-370/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-370/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-370/adapter_model.safetensors b/checkpoint-370/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..215701862dfd3daef06a4b83aa69b001ce4b21c9
--- /dev/null
+++ b/checkpoint-370/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de5523c1f4d5e0755f704707218e585a41c927a2af35e0c7fccceefd7bfb891d
+size 5919456
diff --git a/checkpoint-370/optimizer.pt b/checkpoint-370/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..106424ec0dc7c730cf332938d41ef163fd7daf56
--- /dev/null
+++ b/checkpoint-370/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a2831d79c9f0f15f3ef750598fb642639f67d9cd3e6353be3ae93634ed7b355
+size 11930938
diff --git a/checkpoint-370/rng_state_0.pth b/checkpoint-370/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..abef8aa2e3e993088f6540b7adf4b005c8b3d4b8
--- /dev/null
+++ b/checkpoint-370/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3146f95f0c1744da54cfef23baa84eb478da1ede72afc7bb1a4d9c758b864513
+size 15024
diff --git a/checkpoint-370/rng_state_1.pth b/checkpoint-370/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a7bda2a97f439dc50d8c4c386b75aa8f0dbfb683
--- /dev/null
+++ b/checkpoint-370/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29f638987e1dd0d295ccf6faca3d895a81bf9d37a611d8c054768f38f31ef086
+size 15024
diff --git a/checkpoint-370/rng_state_2.pth b/checkpoint-370/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..199d723b6bbc73b8b9999b13830085f0f41a264e
--- /dev/null
+++ b/checkpoint-370/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11173c1a154308e33fbdcfdddfce8e815d6f8ca6d15a19ed8eb70b9ebcfb6462
+size 15024
diff --git a/checkpoint-370/rng_state_3.pth b/checkpoint-370/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bc87349bd8fad2e53569b988f385cc2293209aae
--- /dev/null
+++ b/checkpoint-370/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6ecb8620405fb1a0e7b3f96c27d98a80ac6913d9646fe27c04de0af84c8d828
+size 15024
diff --git a/checkpoint-370/scheduler.pt b/checkpoint-370/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3b826d4c33eaad1d95c3a01cfd7103458b4c303a
--- /dev/null
+++ b/checkpoint-370/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93eae3f20107e932602a7600eff8db5d2f7a42a5f81899db419e7426cd5a97b0
+size 1064
diff --git a/checkpoint-370/trainer_state.json b/checkpoint-370/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..b3044bcab6ca895478e94d13735e43bc201be0da
--- /dev/null
+++ b/checkpoint-370/trainer_state.json
@@ -0,0 +1,280 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.026848559611058705,
+  "eval_steps": 500,
+  "global_step": 370,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 6.15625,
+      "learning_rate": 4.996810467247207e-05,
+      "loss": 0.3869,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 2.03125,
+      "learning_rate": 4.9965139887167856e-05,
+      "loss": 0.2738,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 2.9375,
+      "learning_rate": 4.996204345883278e-05,
+      "loss": 0.3396,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.9958815403794546e-05,
+      "loss": 0.3736,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 5.09375,
+      "learning_rate": 4.995545573907492e-05,
+      "loss": 0.3458,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 4.875,
+      "learning_rate": 4.995196448238966e-05,
+      "loss": 0.3279,
+      "step": 370
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-370/training_args.bin b/checkpoint-370/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-370/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-380/README.md b/checkpoint-380/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-380/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-380/adapter_config.json b/checkpoint-380/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-380/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-380/adapter_model.safetensors b/checkpoint-380/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7471554c29eaf6f5734ed01382d4dd3cd529954e
--- /dev/null
+++ b/checkpoint-380/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:652850b2c251be39cd241b528944f9f2d099c6ef636063d8c7fe6ea07441e176
+size 5919456
diff --git a/checkpoint-380/optimizer.pt b/checkpoint-380/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ec92f6d46ebfd71b912464099475a0d63286000b
--- /dev/null
+++ b/checkpoint-380/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56e7571274b0c922c2bd9654bd196e47554139cee783ab91f12d304cd3c8a4ac
+size 11930938
diff --git a/checkpoint-380/rng_state_0.pth b/checkpoint-380/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..56391f4ae7e42ee41ef43a331c68c6d12f723b79
--- /dev/null
+++ b/checkpoint-380/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:869f7f45cbd2839fb784ff244fdb5e948a544e107fdee68028fa6014c4a3e871
+size 15024
diff --git a/checkpoint-380/rng_state_1.pth b/checkpoint-380/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..774a7a29447decc4934acc03c3a446a84bd7fe21
--- /dev/null
+++ b/checkpoint-380/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:045f127d34df78c369ad340be7cfade66e2235f599400ab8377e1e0292680dcc
+size 15024
diff --git a/checkpoint-380/rng_state_2.pth b/checkpoint-380/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f18432f91b1aa118f1cad725f96f947db03fffa7
--- /dev/null
+++ b/checkpoint-380/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0bc04bd5a571fccfb0d78451e477662a81282126fc11f1140f74fe348b5e75e
+size 15024
diff --git a/checkpoint-380/rng_state_3.pth b/checkpoint-380/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..97ec1ba887c9aa5c1e67121da363327277afc228
--- /dev/null
+++ b/checkpoint-380/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e877c1e028fb86d09f7a062dad4347790ec12c4e0fccb67351c942ed83d06460
+size 15024
diff --git a/checkpoint-380/scheduler.pt b/checkpoint-380/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f78def2b81cca76278d5faeec6f104acbc15fcd2
--- /dev/null
+++ b/checkpoint-380/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56bf66a5ff3989adabe4941f33f0decd5a3945d0ccdba60d0b9944c8a66e1ad4
+size 1064
diff --git a/checkpoint-380/trainer_state.json b/checkpoint-380/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..a728b27b6a43499e2eafd4ad8f1a04cfa7feae7f
--- /dev/null
+++ b/checkpoint-380/trainer_state.json
@@ -0,0 +1,287 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.027574196357303533,
+  "eval_steps": 500,
+  "global_step": 380,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 6.15625,
+      "learning_rate": 4.996810467247207e-05,
+      "loss": 0.3869,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 2.03125,
+      "learning_rate": 4.9965139887167856e-05,
+      "loss": 0.2738,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 2.9375,
+      "learning_rate": 4.996204345883278e-05,
+      "loss": 0.3396,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.9958815403794546e-05,
+      "loss": 0.3736,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 5.09375,
+      "learning_rate": 4.995545573907492e-05,
+      "loss": 0.3458,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 4.875,
+      "learning_rate": 4.995196448238966e-05,
+      "loss": 0.3279,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 4.90625,
+      "learning_rate": 4.9948341652148436e-05,
+      "loss": 0.2605,
+      "step": 380
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-380/training_args.bin b/checkpoint-380/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-380/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-390/README.md b/checkpoint-390/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-390/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-390/adapter_config.json b/checkpoint-390/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-390/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-390/adapter_model.safetensors b/checkpoint-390/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c814b90baabd75001bcbe50446eb962d212b7e72
--- /dev/null
+++ b/checkpoint-390/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f2e624cf57fc799376620a8462f24f677597938fb5b91ec1a3c31506c439bea
+size 5919456
diff --git a/checkpoint-390/optimizer.pt b/checkpoint-390/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3413caee84a64635c108ff8f94118fde9955a2d6
--- /dev/null
+++ b/checkpoint-390/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58ac964034de08c40e43dbb7f506cbb385bd9b975f5b74a3f32f6255091dcf5c
+size 11930938
diff --git a/checkpoint-390/rng_state_0.pth b/checkpoint-390/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bcb1b3ee5fbbbad35c41795578a7881127a9c5f8
--- /dev/null
+++ b/checkpoint-390/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e746fd42b939bd101d682a4bccf8e64a77e73b3c9f7ab0111d0c59a298820909
+size 15024
diff --git a/checkpoint-390/rng_state_1.pth b/checkpoint-390/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e9121b38dc03afffd7733531c9f8f90f1070129d
--- /dev/null
+++ b/checkpoint-390/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ed64606035f6a7ff110850dddfdcde7e8fa6439784c71880243d45b5713d79a
+size 15024
diff --git a/checkpoint-390/rng_state_2.pth b/checkpoint-390/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1f04bd2afd9a3bb13834aed056e2206c9e53b624
--- /dev/null
+++ b/checkpoint-390/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70878cf40acd532d3d9ddedbbf5dd8d639f13977f1257f64c5ea95dd4d94d264
+size 15024
diff --git a/checkpoint-390/rng_state_3.pth b/checkpoint-390/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6a0b6e17f7b5baa4fa57f93b3c1cdeb7ea601988
--- /dev/null
+++ b/checkpoint-390/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4061a0958b75c03e67bef7eecf7933b3e14b5bb1a44480db37b14b3a00a29f0d
+size 15024
diff --git a/checkpoint-390/scheduler.pt b/checkpoint-390/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4ebc1f32966fb9554744d3c3b42a2297088b9233
--- /dev/null
+++ b/checkpoint-390/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:168b22309e50ae56cbf061069c93c7242e884d92a6ae24caba89cf4a3e8f5ca4
+size 1064
diff --git a/checkpoint-390/trainer_state.json b/checkpoint-390/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..24502177a4149fd19f9f5b8effd0ed2b5328002a
--- /dev/null
+++ b/checkpoint-390/trainer_state.json
@@ -0,0 +1,294 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.028299833103548364,
+  "eval_steps": 500,
+  "global_step": 390,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 6.15625,
+      "learning_rate": 4.996810467247207e-05,
+      "loss": 0.3869,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 2.03125,
+      "learning_rate": 4.9965139887167856e-05,
+      "loss": 0.2738,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 2.9375,
+      "learning_rate": 4.996204345883278e-05,
+      "loss": 0.3396,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.9958815403794546e-05,
+      "loss": 0.3736,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 5.09375,
+      "learning_rate": 4.995545573907492e-05,
+      "loss": 0.3458,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 4.875,
+      "learning_rate": 4.995196448238966e-05,
+      "loss": 0.3279,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 4.90625,
+      "learning_rate": 4.9948341652148436e-05,
+      "loss": 0.2605,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 5.40625,
+      "learning_rate": 4.994458726745468e-05,
+      "loss": 0.3662,
+      "step": 390
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-390/training_args.bin b/checkpoint-390/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-390/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-40/README.md b/checkpoint-40/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-40/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-40/adapter_config.json b/checkpoint-40/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-40/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-40/adapter_model.safetensors b/checkpoint-40/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8b26b49c200cae3e8ff1aa9b5d55764e06b1a4fd
--- /dev/null
+++ b/checkpoint-40/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88466e8b6e9e330e1404183cc712253fb8d267ff84be8fa7f70e7428210608db
+size 5919456
diff --git a/checkpoint-40/optimizer.pt b/checkpoint-40/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f5c7bfa3790f79cc672e66593c3a00e262bc9734
--- /dev/null
+++ b/checkpoint-40/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb78e054d5fd947b5380d087f60ff21600c26376469f84c84b355bcd5590b7bf
+size 11930938
diff --git a/checkpoint-40/rng_state_0.pth b/checkpoint-40/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9e8f16ac08aebd809ed776ef1ac75d3ee0e96ff9
--- /dev/null
+++ b/checkpoint-40/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1f8643655e1b0b84191728db04abf511651177d3f1347864bfc77509451a6e8
+size 15024
diff --git a/checkpoint-40/rng_state_1.pth b/checkpoint-40/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..782296e4dbb91ea8def64d3b86fb5eaf05b4ef66
--- /dev/null
+++ b/checkpoint-40/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65e85d9abc56f5dfd5d665c16de6a0a6bf782dca748ce6a340b75e82d74aeb39
+size 15024
diff --git a/checkpoint-40/rng_state_2.pth b/checkpoint-40/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..627ef6562eeb84b6bf2e753a35f57293e11f918c
--- /dev/null
+++ b/checkpoint-40/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f2d9d0c83de2024e3aa7cb67c9ccf00f23a1d9dcc3af75beabee1dddc69d0a9
+size 15024
diff --git a/checkpoint-40/rng_state_3.pth b/checkpoint-40/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0a349098c52cc9cdb6fff24f55ebc5df9954ea42
--- /dev/null
+++ b/checkpoint-40/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41ffc7ad388f1b28562b762777d46e208986f47ae3693de6880ff38dfa102632
+size 15024
diff --git a/checkpoint-40/scheduler.pt b/checkpoint-40/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8aac13d4e14f507a7756383f7706f53f0d6fa7cd
--- /dev/null
+++ b/checkpoint-40/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3295d2e3095f70f316e50f46ef1873c74c6da1816fd3e20f466117235b1b6979
+size 1064
diff --git a/checkpoint-40/trainer_state.json b/checkpoint-40/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..0227bf6c43bfa80acb9e030292233b06fca86090
--- /dev/null
+++ b/checkpoint-40/trainer_state.json
@@ -0,0 +1,49 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.0029025469849793192,
+  "eval_steps": 500,
+  "global_step": 40,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-40/training_args.bin b/checkpoint-40/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-40/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-400/README.md b/checkpoint-400/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-400/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-400/adapter_config.json b/checkpoint-400/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-400/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-400/adapter_model.safetensors b/checkpoint-400/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..02a3a3a866f28ac7a4c18126b1eb2f1be5cc0234
--- /dev/null
+++ b/checkpoint-400/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87c5f915fc8eeaf0bb6ff598be51971adb515d915e7ec2586521ad6fee292ff2
+size 5919456
diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4bc2adf70f70bb79815633afcefb7aecb99b4230
--- /dev/null
+++ b/checkpoint-400/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95a88d7950adf45068066d19de88a30dae8333405c80ed793d21b21b934cbfce
+size 11930938
diff --git a/checkpoint-400/rng_state_0.pth b/checkpoint-400/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c2ea04feadfa53923087ac350490049ecc04ad0e
--- /dev/null
+++ b/checkpoint-400/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54dc0e933d802a54778f931ba67b11a16b46e9d6a694245077dc890d7f369d06
+size 15024
diff --git a/checkpoint-400/rng_state_1.pth b/checkpoint-400/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7ee05facc9af30e8a6fc98198d9fc6392d85da19
--- /dev/null
+++ b/checkpoint-400/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07686b712dd1aa2a2280fa61110438ceda047eb1071bdd181487081671ab0a93
+size 15024
diff --git a/checkpoint-400/rng_state_2.pth b/checkpoint-400/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b33dd65ae6427ff942048d715ebbb5ab3a2300f6
--- /dev/null
+++ b/checkpoint-400/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f33ba30afda151e1a06581ceff90e24fa69a2593e15f352510b6cdbbeebe383c
+size 15024
diff --git a/checkpoint-400/rng_state_3.pth b/checkpoint-400/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5cdbc3b6f8f0af49d64b6172e1da0c7e1999ce4e
--- /dev/null
+++ b/checkpoint-400/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:627c132479f39e9e71ecaf7302104bd89d7fbc86afe155dbb421a1553b73e3fc
+size 15024
diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9e1273501996a133588afca29e702712ec58f15d
--- /dev/null
+++ b/checkpoint-400/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8ef4d3822d3bae7d5f3f9e7e73362592f9d9c2a32940cadb76b6a34495013b2
+size 1064
diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..5f2f8138206bf290caea341391e02a93e8414a8a
--- /dev/null
+++ b/checkpoint-400/trainer_state.json
@@ -0,0 +1,301 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.029025469849793192,
+  "eval_steps": 500,
+  "global_step": 400,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 6.15625,
+      "learning_rate": 4.996810467247207e-05,
+      "loss": 0.3869,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 2.03125,
+      "learning_rate": 4.9965139887167856e-05,
+      "loss": 0.2738,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 2.9375,
+      "learning_rate": 4.996204345883278e-05,
+      "loss": 0.3396,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.9958815403794546e-05,
+      "loss": 0.3736,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 5.09375,
+      "learning_rate": 4.995545573907492e-05,
+      "loss": 0.3458,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 4.875,
+      "learning_rate": 4.995196448238966e-05,
+      "loss": 0.3279,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 4.90625,
+      "learning_rate": 4.9948341652148436e-05,
+      "loss": 0.2605,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 5.40625,
+      "learning_rate": 4.994458726745468e-05,
+      "loss": 0.3662,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.671875,
+      "learning_rate": 4.9940701348105554e-05,
+      "loss": 0.3122,
+      "step": 400
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-400/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-410/README.md b/checkpoint-410/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-410/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-410/adapter_config.json b/checkpoint-410/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-410/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-410/adapter_model.safetensors b/checkpoint-410/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fb7454cb640baea33e461656b0ea0b68f19cac79
--- /dev/null
+++ b/checkpoint-410/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d4ded1109935ecdd6f1dea24da239b95bb6451b9548bd03c9b2c0eaeb502359
+size 5919456
diff --git a/checkpoint-410/optimizer.pt b/checkpoint-410/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8e66c798561ea722ee1d76e447ff1ca4d2a1745c
--- /dev/null
+++ b/checkpoint-410/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c5220cea16e8c2cf8d472b550ecabb074e588e313071d4e4dec00d0e6840e40
+size 11930938
diff --git a/checkpoint-410/rng_state_0.pth b/checkpoint-410/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5a10219fd6796b7171cfb8fd74630f35e9cb2f19
--- /dev/null
+++ b/checkpoint-410/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24525db6b2052ede211e33d8f6067c274a52bb12ad7de95fc2dca6926c1b23aa
+size 15024
diff --git a/checkpoint-410/rng_state_1.pth b/checkpoint-410/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..42076162158ac6fe0cc3e3845b4b37224cc4e8ef
--- /dev/null
+++ b/checkpoint-410/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9d89f1d5d29a830a31f2af11a3b4fdda286d4a34fb2bd7d3abb9e539275dfda
+size 15024
diff --git a/checkpoint-410/rng_state_2.pth b/checkpoint-410/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d27cc88e72e9e7ef8c3b87830b4724cdf1735033
--- /dev/null
+++ b/checkpoint-410/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce65d5bea5a8aaa2cca6c5d90b2ad798ac914b1a58181697c804c64922d0bf1a
+size 15024
diff --git a/checkpoint-410/rng_state_3.pth b/checkpoint-410/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1b4d8b97503f376bae6c386a0987b3d5a9afabdd
--- /dev/null
+++ b/checkpoint-410/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a4e8dd5fc8ed03ac9fbbc4d44ca3acc4f96ff97d29cd216f6fbe46c504e77b3
+size 15024
diff --git a/checkpoint-410/scheduler.pt b/checkpoint-410/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..880f16414416e183fbc784f4696f8f324d5e0034
--- /dev/null
+++ b/checkpoint-410/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ec1216ff56f0a17167cb5916fae13da7113e6881a3a7f6bcb951e0051e59e8e
+size 1064
diff --git a/checkpoint-410/trainer_state.json b/checkpoint-410/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..dca37c02d6e11197d377a3bf0aa0b421a9c4dce9
--- /dev/null
+++ b/checkpoint-410/trainer_state.json
@@ -0,0 +1,308 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.029751106596038024,
+  "eval_steps": 500,
+  "global_step": 410,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 6.15625,
+      "learning_rate": 4.996810467247207e-05,
+      "loss": 0.3869,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 2.03125,
+      "learning_rate": 4.9965139887167856e-05,
+      "loss": 0.2738,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 2.9375,
+      "learning_rate": 4.996204345883278e-05,
+      "loss": 0.3396,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.9958815403794546e-05,
+      "loss": 0.3736,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 5.09375,
+      "learning_rate": 4.995545573907492e-05,
+      "loss": 0.3458,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 4.875,
+      "learning_rate": 4.995196448238966e-05,
+      "loss": 0.3279,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 4.90625,
+      "learning_rate": 4.9948341652148436e-05,
+      "loss": 0.2605,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 5.40625,
+      "learning_rate": 4.994458726745468e-05,
+      "loss": 0.3662,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.671875,
+      "learning_rate": 4.9940701348105554e-05,
+      "loss": 0.3122,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 3.625,
+      "learning_rate": 4.99366839145918e-05,
+      "loss": 0.2122,
+      "step": 410
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-410/training_args.bin b/checkpoint-410/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-410/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-420/README.md b/checkpoint-420/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-420/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-420/adapter_config.json b/checkpoint-420/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-420/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-420/adapter_model.safetensors b/checkpoint-420/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..67850fb65d8791634d9b347c5e6b3100d359ca56
--- /dev/null
+++ b/checkpoint-420/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5067c6b7cb11125ec82fcfe4807458229939d8d65d7c66017f8f1334adbeccdb
+size 5919456
diff --git a/checkpoint-420/optimizer.pt b/checkpoint-420/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..833e71629c4033292ebfb7e7c15cabab094034ca
--- /dev/null
+++ b/checkpoint-420/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f189e3a63fe0a184be2e88da8a89c99dedecb45b71facaf5ce73e58781a65f0
+size 11930938
diff --git a/checkpoint-420/rng_state_0.pth b/checkpoint-420/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f357702f75f062f4be4aead2f569b9b57bb5bf0d
--- /dev/null
+++ b/checkpoint-420/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:426efd39e791d9aad034be3a3278882178e0bafd0949980040e6e0f4e76007ce
+size 15024
diff --git a/checkpoint-420/rng_state_1.pth b/checkpoint-420/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a7cf9841f4b8d2be7fa958d72ac2c2ce771cce27
--- /dev/null
+++ b/checkpoint-420/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff1be62cf8f366636ea8722d0645e8074ae04aaa5d3bbc7f1e02d041b584f247
+size 15024
diff --git a/checkpoint-420/rng_state_2.pth b/checkpoint-420/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b3474a0f04c7d4978bc633c074bb7130a9c059e4
--- /dev/null
+++ b/checkpoint-420/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c87960dbea60a0ad11846750febf910a22e6bab4ab82a44ec3a565aff8131f2
+size 15024
diff --git a/checkpoint-420/rng_state_3.pth b/checkpoint-420/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b1d2adc334eddd3078e2b8e301f47f18f1400c7e
--- /dev/null
+++ b/checkpoint-420/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb74cda1e8bc1c7ac4e43ea326433964d0d12f005655436df314179a50c9b4ce
+size 15024
diff --git a/checkpoint-420/scheduler.pt b/checkpoint-420/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0f20ba691f6e42c5714b417f8d821a22b8f51841
--- /dev/null
+++ b/checkpoint-420/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4439211fdd47fd6b0e9ce36c269716bc32f11f71c74c63bc12f12d41f9de9be
+size 1064
diff --git a/checkpoint-420/trainer_state.json b/checkpoint-420/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f4f0d129bcef042a79cebeaabaf997dcf5c4c605
--- /dev/null
+++ b/checkpoint-420/trainer_state.json
@@ -0,0 +1,315 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.030476743342282852,
+  "eval_steps": 500,
+  "global_step": 420,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 6.15625,
+      "learning_rate": 4.996810467247207e-05,
+      "loss": 0.3869,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 2.03125,
+      "learning_rate": 4.9965139887167856e-05,
+      "loss": 0.2738,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 2.9375,
+      "learning_rate": 4.996204345883278e-05,
+      "loss": 0.3396,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.9958815403794546e-05,
+      "loss": 0.3736,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 5.09375,
+      "learning_rate": 4.995545573907492e-05,
+      "loss": 0.3458,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 4.875,
+      "learning_rate": 4.995196448238966e-05,
+      "loss": 0.3279,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 4.90625,
+      "learning_rate": 4.9948341652148436e-05,
+      "loss": 0.2605,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 5.40625,
+      "learning_rate": 4.994458726745468e-05,
+      "loss": 0.3662,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.671875,
+      "learning_rate": 4.9940701348105554e-05,
+      "loss": 0.3122,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 3.625,
+      "learning_rate": 4.99366839145918e-05,
+      "loss": 0.2122,
+      "step": 410
+    },
+    {
+      "epoch": 0.030476743342282852,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.993253498809762e-05,
+      "loss": 0.3558,
+      "step": 420
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-420/training_args.bin b/checkpoint-420/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-420/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-430/README.md b/checkpoint-430/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-430/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-430/adapter_config.json b/checkpoint-430/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-430/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-430/adapter_model.safetensors b/checkpoint-430/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e7c64495a47d1345c509dca93e6235ebc8d6c8ae
--- /dev/null
+++ b/checkpoint-430/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16001a0ecff2c6f0a1614b8d2be5cffb61a3522c589b0c1c14b036d3880092d2
+size 5919456
diff --git a/checkpoint-430/optimizer.pt b/checkpoint-430/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..496f3b6120714e155a96f5fb6b157d70cc515494
--- /dev/null
+++ b/checkpoint-430/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8746faab26c4dc95479259c0b8e453d95ad89825c6bf0b52387dc260c3b07f4e
+size 11930938
diff --git a/checkpoint-430/rng_state_0.pth b/checkpoint-430/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8058aa36b0d4b9a67ba1c1bce8485cd3fde721bc
--- /dev/null
+++ b/checkpoint-430/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7450fd18935b074e303410c6ee7b1759df9e00ff6ca83a4aea7b3d18b0ae72f6
+size 15024
diff --git a/checkpoint-430/rng_state_1.pth b/checkpoint-430/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c0d2b91f99b0aa9e554676f946104f185dbe4b50
--- /dev/null
+++ b/checkpoint-430/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:335f98ff53adc4b4ae643ea0ece2aa713eeb67e998dd892eebe667331ebf7953
+size 15024
diff --git a/checkpoint-430/rng_state_2.pth b/checkpoint-430/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..75f9ff5802d81a692dad10c4d13b994e0da922c9
--- /dev/null
+++ b/checkpoint-430/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ad4ef9d87614133017d84fa3f684c7a3aebcd4e1295ed37e3dc4d16cf359822
+size 15024
diff --git a/checkpoint-430/rng_state_3.pth b/checkpoint-430/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9f4bbba56f7df987cfa068bbe8c82860521263df
--- /dev/null
+++ b/checkpoint-430/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4e2215190444d86848e0357b60bd13cc0ccacb2c02b81a1b80097a272eed147
+size 15024
diff --git a/checkpoint-430/scheduler.pt b/checkpoint-430/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a193e5b2dc0e31de0a484862cd595ec4639b137f
--- /dev/null
+++ b/checkpoint-430/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81f748188078d85ac69bcfd958dbfd856147dd01915e9b5495dbd62fad767111
+size 1064
diff --git a/checkpoint-430/trainer_state.json b/checkpoint-430/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..89f36fd908bbd73b5784dde13e9bbf563da185dd
--- /dev/null
+++ b/checkpoint-430/trainer_state.json
@@ -0,0 +1,322 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.031202380088527683,
+  "eval_steps": 500,
+  "global_step": 430,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 6.15625,
+      "learning_rate": 4.996810467247207e-05,
+      "loss": 0.3869,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 2.03125,
+      "learning_rate": 4.9965139887167856e-05,
+      "loss": 0.2738,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 2.9375,
+      "learning_rate": 4.996204345883278e-05,
+      "loss": 0.3396,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.9958815403794546e-05,
+      "loss": 0.3736,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 5.09375,
+      "learning_rate": 4.995545573907492e-05,
+      "loss": 0.3458,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 4.875,
+      "learning_rate": 4.995196448238966e-05,
+      "loss": 0.3279,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 4.90625,
+      "learning_rate": 4.9948341652148436e-05,
+      "loss": 0.2605,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 5.40625,
+      "learning_rate": 4.994458726745468e-05,
+      "loss": 0.3662,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.671875,
+      "learning_rate": 4.9940701348105554e-05,
+      "loss": 0.3122,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 3.625,
+      "learning_rate": 4.99366839145918e-05,
+      "loss": 0.2122,
+      "step": 410
+    },
+    {
+      "epoch": 0.030476743342282852,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.993253498809762e-05,
+      "loss": 0.3558,
+      "step": 420
+    },
+    {
+      "epoch": 0.031202380088527683,
+      "grad_norm": 5.15625,
+      "learning_rate": 4.9928254590500646e-05,
+      "loss": 0.3507,
+      "step": 430
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-430/training_args.bin b/checkpoint-430/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-430/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-440/README.md b/checkpoint-440/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-440/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-440/adapter_config.json b/checkpoint-440/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-440/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-440/adapter_model.safetensors b/checkpoint-440/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f3355d405b0a6b2e0b9b6775f842dcf73c37db22
--- /dev/null
+++ b/checkpoint-440/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:899e9b24f9403a6471ef37783aaeaeda0edcce03356ad0bba7407db9471970e9
+size 5919456
diff --git a/checkpoint-440/optimizer.pt b/checkpoint-440/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..63039c358bd1e73c9060838c2c2ea6efdf6cf395
--- /dev/null
+++ b/checkpoint-440/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbf7c547009941093d04813e063bbc109f02ba4a2c10d3a872241f7a6f5edf9e
+size 11930938
diff --git a/checkpoint-440/rng_state_0.pth b/checkpoint-440/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4d2636ceb8a14218a8e0d1476a39b6e0e7a03cb5
--- /dev/null
+++ b/checkpoint-440/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e670d828f171705ef1de0e28997d07c7dca7a125ec52bc17025aaf4efe7ea5d
+size 15024
diff --git a/checkpoint-440/rng_state_1.pth b/checkpoint-440/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ffa40b71fe39bd6dd67895b18dc6a81f0a1334f7
--- /dev/null
+++ b/checkpoint-440/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23bb6217c29f859f909cb586c1ef966c2c25c18683ffdfdf5947b632a1f81e05
+size 15024
diff --git a/checkpoint-440/rng_state_2.pth b/checkpoint-440/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..65d5ef1e9a8fd69d583a07620b440cebb18536f9
--- /dev/null
+++ b/checkpoint-440/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85f9333f382174e1e1a5d70186992b682a1cf238376145534607c43e01390625
+size 15024
diff --git a/checkpoint-440/rng_state_3.pth b/checkpoint-440/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..274b738231d0fe4d66d5314b450de19158e77bc0
--- /dev/null
+++ b/checkpoint-440/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e92d697b8f695ccff1f67998fb1248fb948fc7ab5770f451a3221a884d92ad42
+size 15024
diff --git a/checkpoint-440/scheduler.pt b/checkpoint-440/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e2bd84b9dddfd46ce180c542cb6db69ccc8b557b
--- /dev/null
+++ b/checkpoint-440/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efecbcd746b21099f525ca6ec229df182d6df47e25384dbc11aed9df98748eb2
+size 1064
diff --git a/checkpoint-440/trainer_state.json b/checkpoint-440/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..98f415e03b3ec2197feb25d291fd7f5520406565
--- /dev/null
+++ b/checkpoint-440/trainer_state.json
@@ -0,0 +1,329 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.03192801683477251,
+  "eval_steps": 500,
+  "global_step": 440,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 6.15625,
+      "learning_rate": 4.996810467247207e-05,
+      "loss": 0.3869,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 2.03125,
+      "learning_rate": 4.9965139887167856e-05,
+      "loss": 0.2738,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 2.9375,
+      "learning_rate": 4.996204345883278e-05,
+      "loss": 0.3396,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.9958815403794546e-05,
+      "loss": 0.3736,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 5.09375,
+      "learning_rate": 4.995545573907492e-05,
+      "loss": 0.3458,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 4.875,
+      "learning_rate": 4.995196448238966e-05,
+      "loss": 0.3279,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 4.90625,
+      "learning_rate": 4.9948341652148436e-05,
+      "loss": 0.2605,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 5.40625,
+      "learning_rate": 4.994458726745468e-05,
+      "loss": 0.3662,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.671875,
+      "learning_rate": 4.9940701348105554e-05,
+      "loss": 0.3122,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 3.625,
+      "learning_rate": 4.99366839145918e-05,
+      "loss": 0.2122,
+      "step": 410
+    },
+    {
+      "epoch": 0.030476743342282852,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.993253498809762e-05,
+      "loss": 0.3558,
+      "step": 420
+    },
+    {
+      "epoch": 0.031202380088527683,
+      "grad_norm": 5.15625,
+      "learning_rate": 4.9928254590500646e-05,
+      "loss": 0.3507,
+      "step": 430
+    },
+    {
+      "epoch": 0.03192801683477251,
+      "grad_norm": 5.3125,
+      "learning_rate": 4.9923842744371707e-05,
+      "loss": 0.283,
+      "step": 440
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-440/training_args.bin b/checkpoint-440/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-440/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-450/README.md b/checkpoint-450/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-450/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-450/adapter_config.json b/checkpoint-450/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-450/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-450/adapter_model.safetensors b/checkpoint-450/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dd738ad676ee26b39235a491e3c4c6e508d7f4f5
--- /dev/null
+++ b/checkpoint-450/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8cc3ef09cd5f463ce3590be35b2c5a296359c797b2ff16d89694c28248405ef6
+size 5919456
diff --git a/checkpoint-450/optimizer.pt b/checkpoint-450/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..225ce9edcd1edc8aa27e54d1c90ea0a1c6f3d531
--- /dev/null
+++ b/checkpoint-450/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:650ae26e388bdf1359bf491bdb427b07f09a745683254c69724bbaacb92a96b1
+size 11930938
diff --git a/checkpoint-450/rng_state_0.pth b/checkpoint-450/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0745b698c301be41bd431c96a48dad3a6eb34a3f
--- /dev/null
+++ b/checkpoint-450/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74c0bbe13ba6abf11c8e74c819dd4812dcf1cd31a15084b5d9a67d92cffcd15a
+size 15024
diff --git a/checkpoint-450/rng_state_1.pth b/checkpoint-450/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d574d6b69adc36930af48e3de39f40c58b94563e
--- /dev/null
+++ b/checkpoint-450/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4af53bdc297eb334ac93c2f0f17d63f14b6fde6ea4eab4e3168813ead2e95f2a
+size 15024
diff --git a/checkpoint-450/rng_state_2.pth b/checkpoint-450/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b02735cedf018b9522e0d896eebc25d9fe9ffe6c
--- /dev/null
+++ b/checkpoint-450/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8113f7db0a254d0cdc384448069e86023fafbe13c1d2d6b23e47a02b4ae9dc99
+size 15024
diff --git a/checkpoint-450/rng_state_3.pth b/checkpoint-450/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e7ac28f9fa89af4f49123ccad74a328c603a376d
--- /dev/null
+++ b/checkpoint-450/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:369be34e82f746d1ac1423468b34f53a5dfb7cd0bc6c2f70b663f6762401f7c5
+size 15024
diff --git a/checkpoint-450/scheduler.pt b/checkpoint-450/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c6c4b0aff441ca63c22c3f134f6b0fab4d87f7ba
--- /dev/null
+++ b/checkpoint-450/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21cce50b155bda34218e91fb029799898423f5ba5d908676bebf35263eaef6d8
+size 1064
diff --git a/checkpoint-450/trainer_state.json b/checkpoint-450/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d98208c151c07cbff4761f9de9f32c389c18880c
--- /dev/null
+++ b/checkpoint-450/trainer_state.json
@@ -0,0 +1,336 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.03265365358101734,
+  "eval_steps": 500,
+  "global_step": 450,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 6.15625,
+      "learning_rate": 4.996810467247207e-05,
+      "loss": 0.3869,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 2.03125,
+      "learning_rate": 4.9965139887167856e-05,
+      "loss": 0.2738,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 2.9375,
+      "learning_rate": 4.996204345883278e-05,
+      "loss": 0.3396,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.9958815403794546e-05,
+      "loss": 0.3736,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 5.09375,
+      "learning_rate": 4.995545573907492e-05,
+      "loss": 0.3458,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 4.875,
+      "learning_rate": 4.995196448238966e-05,
+      "loss": 0.3279,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 4.90625,
+      "learning_rate": 4.9948341652148436e-05,
+      "loss": 0.2605,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 5.40625,
+      "learning_rate": 4.994458726745468e-05,
+      "loss": 0.3662,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.671875,
+      "learning_rate": 4.9940701348105554e-05,
+      "loss": 0.3122,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 3.625,
+      "learning_rate": 4.99366839145918e-05,
+      "loss": 0.2122,
+      "step": 410
+    },
+    {
+      "epoch": 0.030476743342282852,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.993253498809762e-05,
+      "loss": 0.3558,
+      "step": 420
+    },
+    {
+      "epoch": 0.031202380088527683,
+      "grad_norm": 5.15625,
+      "learning_rate": 4.9928254590500646e-05,
+      "loss": 0.3507,
+      "step": 430
+    },
+    {
+      "epoch": 0.03192801683477251,
+      "grad_norm": 5.3125,
+      "learning_rate": 4.9923842744371707e-05,
+      "loss": 0.283,
+      "step": 440
+    },
+    {
+      "epoch": 0.03265365358101734,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.99192994729748e-05,
+      "loss": 0.2209,
+      "step": 450
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-450/training_args.bin b/checkpoint-450/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-450/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-460/README.md b/checkpoint-460/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-460/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-460/adapter_config.json b/checkpoint-460/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-460/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-460/adapter_model.safetensors b/checkpoint-460/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5194422e66d0577b11428d109fecd7d202c5ad71
--- /dev/null
+++ b/checkpoint-460/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e6a81bcfc0e8506bc572d0622b99fc5324fa281df52c7fabf849e109aa98bc3
+size 5919456
diff --git a/checkpoint-460/optimizer.pt b/checkpoint-460/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1560c808ae4ed1d04c300d9704468ddc6c3a619b
--- /dev/null
+++ b/checkpoint-460/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3465dacc83d3f790fcfba40cd2f8777366909f44d6581aa5f9cbb71cd835c6f9
+size 11930938
diff --git a/checkpoint-460/rng_state_0.pth b/checkpoint-460/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b838b908ff331437a6131d2af0e993a2bc005dd4
--- /dev/null
+++ b/checkpoint-460/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b8845c25047974accbaa9efd358cc36a461745fbf3ab5630bbe550cb2b851c2
+size 15024
diff --git a/checkpoint-460/rng_state_1.pth b/checkpoint-460/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..33f4548ad077877a96fbee360960d1664989a387
--- /dev/null
+++ b/checkpoint-460/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b199e4e6e20c7b33b046371768306edbdced633420d13ce24bafba41b6122d29
+size 15024
diff --git a/checkpoint-460/rng_state_2.pth b/checkpoint-460/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d56eefe51a3a86da4e791062a7c76f3df0331ffe
--- /dev/null
+++ b/checkpoint-460/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29753073ef4a349f5fd75b836509a96e00aff64408a70ac0de1cdab063ed75fb
+size 15024
diff --git a/checkpoint-460/rng_state_3.pth b/checkpoint-460/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..884793ddb489c0440cd3b99c10e422577e418add
--- /dev/null
+++ b/checkpoint-460/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04be3e3e127fc602bc75808f73c0de94b73bd8099b502291d7806b8705651a73
+size 15024
diff --git a/checkpoint-460/scheduler.pt b/checkpoint-460/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e5cf9e0caecff8d511a9c35c6d684a864ebfcc42
--- /dev/null
+++ b/checkpoint-460/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:678fc5d47e3e0c38a0ed9a38f4d264e33b827cf25b8a50f107b9bca0e0e468f3
+size 1064
diff --git a/checkpoint-460/trainer_state.json b/checkpoint-460/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..e646a13f57b4f3d0ca70a0dba6799d0b9c616dcf
--- /dev/null
+++ b/checkpoint-460/trainer_state.json
@@ -0,0 +1,343 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.033379290327262175,
+  "eval_steps": 500,
+  "global_step": 460,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 6.15625,
+      "learning_rate": 4.996810467247207e-05,
+      "loss": 0.3869,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 2.03125,
+      "learning_rate": 4.9965139887167856e-05,
+      "loss": 0.2738,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 2.9375,
+      "learning_rate": 4.996204345883278e-05,
+      "loss": 0.3396,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.9958815403794546e-05,
+      "loss": 0.3736,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 5.09375,
+      "learning_rate": 4.995545573907492e-05,
+      "loss": 0.3458,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 4.875,
+      "learning_rate": 4.995196448238966e-05,
+      "loss": 0.3279,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 4.90625,
+      "learning_rate": 4.9948341652148436e-05,
+      "loss": 0.2605,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 5.40625,
+      "learning_rate": 4.994458726745468e-05,
+      "loss": 0.3662,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.671875,
+      "learning_rate": 4.9940701348105554e-05,
+      "loss": 0.3122,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 3.625,
+      "learning_rate": 4.99366839145918e-05,
+      "loss": 0.2122,
+      "step": 410
+    },
+    {
+      "epoch": 0.030476743342282852,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.993253498809762e-05,
+      "loss": 0.3558,
+      "step": 420
+    },
+    {
+      "epoch": 0.031202380088527683,
+      "grad_norm": 5.15625,
+      "learning_rate": 4.9928254590500646e-05,
+      "loss": 0.3507,
+      "step": 430
+    },
+    {
+      "epoch": 0.03192801683477251,
+      "grad_norm": 5.3125,
+      "learning_rate": 4.9923842744371707e-05,
+      "loss": 0.283,
+      "step": 440
+    },
+    {
+      "epoch": 0.03265365358101734,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.99192994729748e-05,
+      "loss": 0.2209,
+      "step": 450
+    },
+    {
+      "epoch": 0.033379290327262175,
+      "grad_norm": 4.125,
+      "learning_rate": 4.991462480026693e-05,
+      "loss": 0.3749,
+      "step": 460
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-460/training_args.bin b/checkpoint-460/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-460/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-470/README.md b/checkpoint-470/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-470/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-470/adapter_config.json b/checkpoint-470/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-470/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-470/adapter_model.safetensors b/checkpoint-470/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f17de6e388648b065b44ddbb886919f63852f441
--- /dev/null
+++ b/checkpoint-470/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c326444e13d3d3b2940540b5588d9d410ceaa9315f7b3ae6ba6f9eebf3653f6
+size 5919456
diff --git a/checkpoint-470/optimizer.pt b/checkpoint-470/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f23c105cac142c2a86eb5082893cfedb430bf0ab
--- /dev/null
+++ b/checkpoint-470/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87dbf369e485e0681bd08fef9f7a75bed0d1404c095ed8cf77464777719d9a80
+size 11930938
diff --git a/checkpoint-470/rng_state_0.pth b/checkpoint-470/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0688ed11d1c135deff86938945f90db7ed5ddbd1
--- /dev/null
+++ b/checkpoint-470/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adb32e48094a0dac7955fa2f09f16dac717c26183a1658a52b7573a5de626d15
+size 15024
diff --git a/checkpoint-470/rng_state_1.pth b/checkpoint-470/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0b6938c07f65a9809de4d22b9e0318498658493c
--- /dev/null
+++ b/checkpoint-470/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ff83bd39b36fad96801a968f07d8d1238dc9b1457c73621fec74fa93d13654d
+size 15024
diff --git a/checkpoint-470/rng_state_2.pth b/checkpoint-470/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..77d6acbeeaf667480e511fe2883124a02cc9a734
--- /dev/null
+++ b/checkpoint-470/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d3d504b68e122f85d20d5aeb96e17473c5eb042edd256a20bcf11ddca8efc63
+size 15024
diff --git a/checkpoint-470/rng_state_3.pth b/checkpoint-470/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7d06f172f522dc80c377b02a019e6c0c67aae400
--- /dev/null
+++ b/checkpoint-470/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adb16d739758dd771f1f612ef8e6c47be299b5accb7714b043af6f5c8e3e86ca
+size 15024
diff --git a/checkpoint-470/scheduler.pt b/checkpoint-470/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e78e158bc234ddb67d1ef09209bc9e019dab9638
--- /dev/null
+++ b/checkpoint-470/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:282674eff182c610940ef5e603a2e93a16755529e3c4a126db2b2ae1d3605f76
+size 1064
diff --git a/checkpoint-470/trainer_state.json b/checkpoint-470/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..296c9aca80e0d2f5840df81acaf34819e98b7697
--- /dev/null
+++ b/checkpoint-470/trainer_state.json
@@ -0,0 +1,350 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.034104927073507,
+  "eval_steps": 500,
+  "global_step": 470,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 6.15625,
+      "learning_rate": 4.996810467247207e-05,
+      "loss": 0.3869,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 2.03125,
+      "learning_rate": 4.9965139887167856e-05,
+      "loss": 0.2738,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 2.9375,
+      "learning_rate": 4.996204345883278e-05,
+      "loss": 0.3396,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.9958815403794546e-05,
+      "loss": 0.3736,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 5.09375,
+      "learning_rate": 4.995545573907492e-05,
+      "loss": 0.3458,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 4.875,
+      "learning_rate": 4.995196448238966e-05,
+      "loss": 0.3279,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 4.90625,
+      "learning_rate": 4.9948341652148436e-05,
+      "loss": 0.2605,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 5.40625,
+      "learning_rate": 4.994458726745468e-05,
+      "loss": 0.3662,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.671875,
+      "learning_rate": 4.9940701348105554e-05,
+      "loss": 0.3122,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 3.625,
+      "learning_rate": 4.99366839145918e-05,
+      "loss": 0.2122,
+      "step": 410
+    },
+    {
+      "epoch": 0.030476743342282852,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.993253498809762e-05,
+      "loss": 0.3558,
+      "step": 420
+    },
+    {
+      "epoch": 0.031202380088527683,
+      "grad_norm": 5.15625,
+      "learning_rate": 4.9928254590500646e-05,
+      "loss": 0.3507,
+      "step": 430
+    },
+    {
+      "epoch": 0.03192801683477251,
+      "grad_norm": 5.3125,
+      "learning_rate": 4.9923842744371707e-05,
+      "loss": 0.283,
+      "step": 440
+    },
+    {
+      "epoch": 0.03265365358101734,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.99192994729748e-05,
+      "loss": 0.2209,
+      "step": 450
+    },
+    {
+      "epoch": 0.033379290327262175,
+      "grad_norm": 4.125,
+      "learning_rate": 4.991462480026693e-05,
+      "loss": 0.3749,
+      "step": 460
+    },
+    {
+      "epoch": 0.034104927073507,
+      "grad_norm": 1.171875,
+      "learning_rate": 4.9909818750898e-05,
+      "loss": 0.2958,
+      "step": 470
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-470/training_args.bin b/checkpoint-470/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-470/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-480/README.md b/checkpoint-480/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-480/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-480/adapter_config.json b/checkpoint-480/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-480/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-480/adapter_model.safetensors b/checkpoint-480/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..115d4fca73549105abfa325414fb7cc5e9c0f76b
--- /dev/null
+++ b/checkpoint-480/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36be68a314bbc922c9ce0b8b2eab65f9e7aee6941ca0df76b8393dd6077a1300
+size 5919456
diff --git a/checkpoint-480/optimizer.pt b/checkpoint-480/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b659320d146e1ce2199d2cbd615c594032fd91dc
--- /dev/null
+++ b/checkpoint-480/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:908318a294c9a2158f18ac727a467073d92916f835f51226bbe3596a484a75c5
+size 11930938
diff --git a/checkpoint-480/rng_state_0.pth b/checkpoint-480/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..dd695f5ec0a9b100368676106e8249dca09000cb
--- /dev/null
+++ b/checkpoint-480/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bdd7fe83304da1de4b6e029b064d38818f9e09a77fa7b1837c391d69a4db80d
+size 15024
diff --git a/checkpoint-480/rng_state_1.pth b/checkpoint-480/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4395d4574ab705608389a4096ac7429c17391b99
--- /dev/null
+++ b/checkpoint-480/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a43726b12c49859b617cf6ad2e9ce7c229b70cdbac211acd7ab9fb98f436d792
+size 15024
diff --git a/checkpoint-480/rng_state_2.pth b/checkpoint-480/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4596e3f7fa806e23ac18d17854250bf36b96348f
--- /dev/null
+++ b/checkpoint-480/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b073c32a283cc7db429d91fad9c6c3b3750532a54d7bac5b9d921554f64c6e2
+size 15024
diff --git a/checkpoint-480/rng_state_3.pth b/checkpoint-480/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..24813190ba7d7663d2de299dc64912c3742e2731
--- /dev/null
+++ b/checkpoint-480/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32de2571eae3fed510c12c3aee5f09bc3d176d5edc3a8b9c5d855f87f2d04e83
+size 15024
diff --git a/checkpoint-480/scheduler.pt b/checkpoint-480/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4441911108cc3e15a455b4238a2be440dc3a824e
--- /dev/null
+++ b/checkpoint-480/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd57b079506a0bcf5941137436c22cd7f744a9da7aaf0cca954560fe50883e60
+size 1064
diff --git a/checkpoint-480/trainer_state.json b/checkpoint-480/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..0528c98121b986a2d742695886e7912e6313a79c
--- /dev/null
+++ b/checkpoint-480/trainer_state.json
@@ -0,0 +1,357 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.03483056381975183,
+  "eval_steps": 500,
+  "global_step": 480,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 6.15625,
+      "learning_rate": 4.996810467247207e-05,
+      "loss": 0.3869,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 2.03125,
+      "learning_rate": 4.9965139887167856e-05,
+      "loss": 0.2738,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 2.9375,
+      "learning_rate": 4.996204345883278e-05,
+      "loss": 0.3396,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.9958815403794546e-05,
+      "loss": 0.3736,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 5.09375,
+      "learning_rate": 4.995545573907492e-05,
+      "loss": 0.3458,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 4.875,
+      "learning_rate": 4.995196448238966e-05,
+      "loss": 0.3279,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 4.90625,
+      "learning_rate": 4.9948341652148436e-05,
+      "loss": 0.2605,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 5.40625,
+      "learning_rate": 4.994458726745468e-05,
+      "loss": 0.3662,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.671875,
+      "learning_rate": 4.9940701348105554e-05,
+      "loss": 0.3122,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 3.625,
+      "learning_rate": 4.99366839145918e-05,
+      "loss": 0.2122,
+      "step": 410
+    },
+    {
+      "epoch": 0.030476743342282852,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.993253498809762e-05,
+      "loss": 0.3558,
+      "step": 420
+    },
+    {
+      "epoch": 0.031202380088527683,
+      "grad_norm": 5.15625,
+      "learning_rate": 4.9928254590500646e-05,
+      "loss": 0.3507,
+      "step": 430
+    },
+    {
+      "epoch": 0.03192801683477251,
+      "grad_norm": 5.3125,
+      "learning_rate": 4.9923842744371707e-05,
+      "loss": 0.283,
+      "step": 440
+    },
+    {
+      "epoch": 0.03265365358101734,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.99192994729748e-05,
+      "loss": 0.2209,
+      "step": 450
+    },
+    {
+      "epoch": 0.033379290327262175,
+      "grad_norm": 4.125,
+      "learning_rate": 4.991462480026693e-05,
+      "loss": 0.3749,
+      "step": 460
+    },
+    {
+      "epoch": 0.034104927073507,
+      "grad_norm": 1.171875,
+      "learning_rate": 4.9909818750898e-05,
+      "loss": 0.2958,
+      "step": 470
+    },
+    {
+      "epoch": 0.03483056381975183,
+      "grad_norm": 5.90625,
+      "learning_rate": 4.990488135021065e-05,
+      "loss": 0.2689,
+      "step": 480
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-480/training_args.bin b/checkpoint-480/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-480/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-490/README.md b/checkpoint-490/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-490/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-490/adapter_config.json b/checkpoint-490/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-490/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-490/adapter_model.safetensors b/checkpoint-490/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..42c8069c8cfcbee42d3052bc6913915b330f5cb5
--- /dev/null
+++ b/checkpoint-490/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:636e044557706d45a38272f7ebcf0d53ce78404d7cc03b82e44e8aecc45df603
+size 5919456
diff --git a/checkpoint-490/optimizer.pt b/checkpoint-490/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2abcf3ab9c3c72f264dc7603135cfa4701e354a0
--- /dev/null
+++ b/checkpoint-490/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5273cf96aedd1921824d21860a1a48e5b098b9e687f8359810a6750c0c92aea
+size 11930938
diff --git a/checkpoint-490/rng_state_0.pth b/checkpoint-490/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7317b2a071f4d4c6f8928de009a47d5a279b4a3f
--- /dev/null
+++ b/checkpoint-490/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c39e6dd933a981504f685d9eeda547754279c4ea34bdbe961ca8cac2e3da23ed
+size 15024
diff --git a/checkpoint-490/rng_state_1.pth b/checkpoint-490/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e17a24288d670453b9309de7536e766bc4466cfc
--- /dev/null
+++ b/checkpoint-490/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da140232ee2cf7677bd8bc3ae61146529b06bddde62889ba5a140503cefa7f75
+size 15024
diff --git a/checkpoint-490/rng_state_2.pth b/checkpoint-490/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b49ca76f7aa7d5ae52dc6fd8ddba0c7a228405ad
--- /dev/null
+++ b/checkpoint-490/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f899414b9c868c63ca69be28e7db7a82431120a8f303fc775bd158aae7551a82
+size 15024
diff --git a/checkpoint-490/rng_state_3.pth b/checkpoint-490/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d18c9fa1a3167e5106bbd251fa488da008916302
--- /dev/null
+++ b/checkpoint-490/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9c53a8c775f3fc0ce9f5674678dfd36ef6d99a7137fcf28bd9a8052397714b4
+size 15024
diff --git a/checkpoint-490/scheduler.pt b/checkpoint-490/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1170acd6feaf33155dbb866df89953dbed377886
--- /dev/null
+++ b/checkpoint-490/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5d236fbb7d3caf1f52e4a51096c9e72b24a028780c6280e20d38055f87c14a1
+size 1064
diff --git a/checkpoint-490/trainer_state.json b/checkpoint-490/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..3c30fd8de5e54be272e38176bd2a31fa707c7457
--- /dev/null
+++ b/checkpoint-490/trainer_state.json
@@ -0,0 +1,364 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.03555620056599666,
+  "eval_steps": 500,
+  "global_step": 490,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 6.15625,
+      "learning_rate": 4.996810467247207e-05,
+      "loss": 0.3869,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 2.03125,
+      "learning_rate": 4.9965139887167856e-05,
+      "loss": 0.2738,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 2.9375,
+      "learning_rate": 4.996204345883278e-05,
+      "loss": 0.3396,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.9958815403794546e-05,
+      "loss": 0.3736,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 5.09375,
+      "learning_rate": 4.995545573907492e-05,
+      "loss": 0.3458,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 4.875,
+      "learning_rate": 4.995196448238966e-05,
+      "loss": 0.3279,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 4.90625,
+      "learning_rate": 4.9948341652148436e-05,
+      "loss": 0.2605,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 5.40625,
+      "learning_rate": 4.994458726745468e-05,
+      "loss": 0.3662,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.671875,
+      "learning_rate": 4.9940701348105554e-05,
+      "loss": 0.3122,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 3.625,
+      "learning_rate": 4.99366839145918e-05,
+      "loss": 0.2122,
+      "step": 410
+    },
+    {
+      "epoch": 0.030476743342282852,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.993253498809762e-05,
+      "loss": 0.3558,
+      "step": 420
+    },
+    {
+      "epoch": 0.031202380088527683,
+      "grad_norm": 5.15625,
+      "learning_rate": 4.9928254590500646e-05,
+      "loss": 0.3507,
+      "step": 430
+    },
+    {
+      "epoch": 0.03192801683477251,
+      "grad_norm": 5.3125,
+      "learning_rate": 4.9923842744371707e-05,
+      "loss": 0.283,
+      "step": 440
+    },
+    {
+      "epoch": 0.03265365358101734,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.99192994729748e-05,
+      "loss": 0.2209,
+      "step": 450
+    },
+    {
+      "epoch": 0.033379290327262175,
+      "grad_norm": 4.125,
+      "learning_rate": 4.991462480026693e-05,
+      "loss": 0.3749,
+      "step": 460
+    },
+    {
+      "epoch": 0.034104927073507,
+      "grad_norm": 1.171875,
+      "learning_rate": 4.9909818750898e-05,
+      "loss": 0.2958,
+      "step": 470
+    },
+    {
+      "epoch": 0.03483056381975183,
+      "grad_norm": 5.90625,
+      "learning_rate": 4.990488135021065e-05,
+      "loss": 0.2689,
+      "step": 480
+    },
+    {
+      "epoch": 0.03555620056599666,
+      "grad_norm": 1.0390625,
+      "learning_rate": 4.989981262424017e-05,
+      "loss": 0.2934,
+      "step": 490
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-490/training_args.bin b/checkpoint-490/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-490/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-50/README.md b/checkpoint-50/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-50/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-50/adapter_config.json b/checkpoint-50/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-50/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-50/adapter_model.safetensors b/checkpoint-50/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7d1b2cc506406f52ecce7ea93d9cc36c4f721763
--- /dev/null
+++ b/checkpoint-50/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e278e241f606975c6cb04531244a06e0ebc17c38ed79cc5b238ace1b02193ec
+size 5919456
diff --git a/checkpoint-50/optimizer.pt b/checkpoint-50/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fa39a5537723073b250f73f22507865591b41fc3
--- /dev/null
+++ b/checkpoint-50/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ceac6c35e63227122aa27ad52daaf702fcdb230ed554ba5a8336a86696b29884
+size 11930938
diff --git a/checkpoint-50/rng_state_0.pth b/checkpoint-50/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c22151a1209c798e2edd9f72d7c214a21b84a5e7
--- /dev/null
+++ b/checkpoint-50/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ca2453cc0e6f21b27dd55161c5ceb9f93dba9a60fdcc2334b54014fdebd27f1
+size 15024
diff --git a/checkpoint-50/rng_state_1.pth b/checkpoint-50/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..125a8e581dc62840fccde0304035473fa67aea68
--- /dev/null
+++ b/checkpoint-50/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fccf4bd170cfbbcb8f5cb475846f3d9201f4812f364f4429f76430941f4474e7
+size 15024
diff --git a/checkpoint-50/rng_state_2.pth b/checkpoint-50/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ae9d38fdb5e2b50484f98af2342b6994395c3c48
--- /dev/null
+++ b/checkpoint-50/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44870a1cd57a925d8984c35d5ca35418b5efaf6a7e4ddb35fef82771a7a8657e
+size 15024
diff --git a/checkpoint-50/rng_state_3.pth b/checkpoint-50/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e4c8704fd79cbe7b2ab10d373a0dc386263bdad4
--- /dev/null
+++ b/checkpoint-50/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2db40412024102d43c49ec94e638c45b8466c0d3c5054616de33a12991ef0e0
+size 15024
diff --git a/checkpoint-50/scheduler.pt b/checkpoint-50/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..980304869213d2faf584d3741d164502ca176740
--- /dev/null
+++ b/checkpoint-50/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca2ad0660f4430a149f84300447ae0a59e68e8b51799a2e3848afe158aff8281
+size 1064
diff --git a/checkpoint-50/trainer_state.json b/checkpoint-50/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..81a4922020382aa4d42e7f2ad2ce06a2d556de73
--- /dev/null
+++ b/checkpoint-50/trainer_state.json
@@ -0,0 +1,56 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.003628183731224149,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-50/training_args.bin b/checkpoint-50/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-50/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-500/README.md b/checkpoint-500/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-500/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-500/adapter_config.json b/checkpoint-500/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-500/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-500/adapter_model.safetensors b/checkpoint-500/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1ecee56f3fa8989895e4105b24fb0a17229bd9a6
--- /dev/null
+++ b/checkpoint-500/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:accec9762c9bfaca53ce6d1516d52f491603542956444919287631a1296630a7
+size 5919456
diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a81a9d4ce867193b601e979932f532e756f7e93e
--- /dev/null
+++ b/checkpoint-500/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:762212f0a6cec6a1a97549feada03dd7cc289aa1df860a70ffa7904a7f340398
+size 11930938
diff --git a/checkpoint-500/rng_state_0.pth b/checkpoint-500/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ecfd871d7ca3dac10b6fbf240674f00e3bbbb3e5
--- /dev/null
+++ b/checkpoint-500/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61fcabab618aa920263cc05562edc63ab379bbc2af6214bd9b50f4d82d1baa88
+size 15024
diff --git a/checkpoint-500/rng_state_1.pth b/checkpoint-500/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cf80463aba5b1dcde23e73ae610d777aeba81574
--- /dev/null
+++ b/checkpoint-500/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3bed45a2ce03911ebbd9f3617ca35a520640d1441b923c5de1a5b3d0899274d3
+size 15024
diff --git a/checkpoint-500/rng_state_2.pth b/checkpoint-500/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0e060fff7b5c3f86838f3bca421e2c0eae7e88bb
--- /dev/null
+++ b/checkpoint-500/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39b3770ffe7b1e1c70bb3442c8631010c8c7abec9f87214214c5cfb8e07e3ce6
+size 15024
diff --git a/checkpoint-500/rng_state_3.pth b/checkpoint-500/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b08e4ee1790ce402cb107a08f52a184200d23dab
--- /dev/null
+++ b/checkpoint-500/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0242c29973a37a9bb180209978d26830ada301aa01aebf1057682da5a647fa69
+size 15024
diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..569c5eb10a361c7b731f145e136364941b0cbd39
--- /dev/null
+++ b/checkpoint-500/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ba01092ed1259dc7db639b0740726831339bf1937007a1cfb36bf74b5d27448
+size 1064
diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d2a794950d34616aed74b99a1cb0e47433be63a9
--- /dev/null
+++ b/checkpoint-500/trainer_state.json
@@ -0,0 +1,371 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.036281837312241494,
+  "eval_steps": 500,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 6.6875,
+      "learning_rate": 5e-05,
+      "loss": 0.6256,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 5.03125,
+      "learning_rate": 4.9999934086574596e-05,
+      "loss": 0.6601,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 5.59375,
+      "learning_rate": 4.9999736346645943e-05,
+      "loss": 0.659,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 3.953125,
+      "learning_rate": 4.999940678125673e-05,
+      "loss": 0.512,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 5.46875,
+      "learning_rate": 4.9998945392144796e-05,
+      "loss": 0.5522,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 3.96875,
+      "learning_rate": 4.999835218174307e-05,
+      "loss": 0.5532,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 3.234375,
+      "learning_rate": 4.99976271531796e-05,
+      "loss": 0.5353,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 4.5,
+      "learning_rate": 4.9996770310277506e-05,
+      "loss": 0.4439,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 4.03125,
+      "learning_rate": 4.9995781657555e-05,
+      "loss": 0.4832,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 4.71875,
+      "learning_rate": 4.99946612002253e-05,
+      "loss": 0.5325,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.390625,
+      "learning_rate": 4.9993408944196676e-05,
+      "loss": 0.3253,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 4.25,
+      "learning_rate": 4.9992024896072364e-05,
+      "loss": 0.3897,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 5.65625,
+      "learning_rate": 4.999050906315055e-05,
+      "loss": 0.3642,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 1.875,
+      "learning_rate": 4.998886145342434e-05,
+      "loss": 0.4811,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 3.546875,
+      "learning_rate": 4.9987082075581684e-05,
+      "loss": 0.4725,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 3.046875,
+      "learning_rate": 4.9985170939005386e-05,
+      "loss": 0.3427,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.998312805377302e-05,
+      "loss": 0.3568,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 4.5,
+      "learning_rate": 4.998095343065685e-05,
+      "loss": 0.3803,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 3.984375,
+      "learning_rate": 4.997864708112384e-05,
+      "loss": 0.3002,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.997620901733554e-05,
+      "loss": 0.3354,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 2.703125,
+      "learning_rate": 4.997363925214803e-05,
+      "loss": 0.3285,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 3.171875,
+      "learning_rate": 4.9970937799111896e-05,
+      "loss": 0.2985,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 6.15625,
+      "learning_rate": 4.996810467247207e-05,
+      "loss": 0.3869,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 2.03125,
+      "learning_rate": 4.9965139887167856e-05,
+      "loss": 0.2738,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 2.9375,
+      "learning_rate": 4.996204345883278e-05,
+      "loss": 0.3396,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.9958815403794546e-05,
+      "loss": 0.3736,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 5.09375,
+      "learning_rate": 4.995545573907492e-05,
+      "loss": 0.3458,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 4.875,
+      "learning_rate": 4.995196448238966e-05,
+      "loss": 0.3279,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 4.90625,
+      "learning_rate": 4.9948341652148436e-05,
+      "loss": 0.2605,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 5.40625,
+      "learning_rate": 4.994458726745468e-05,
+      "loss": 0.3662,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.671875,
+      "learning_rate": 4.9940701348105554e-05,
+      "loss": 0.3122,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 3.625,
+      "learning_rate": 4.99366839145918e-05,
+      "loss": 0.2122,
+      "step": 410
+    },
+    {
+      "epoch": 0.030476743342282852,
+      "grad_norm": 6.0625,
+      "learning_rate": 4.993253498809762e-05,
+      "loss": 0.3558,
+      "step": 420
+    },
+    {
+      "epoch": 0.031202380088527683,
+      "grad_norm": 5.15625,
+      "learning_rate": 4.9928254590500646e-05,
+      "loss": 0.3507,
+      "step": 430
+    },
+    {
+      "epoch": 0.03192801683477251,
+      "grad_norm": 5.3125,
+      "learning_rate": 4.9923842744371707e-05,
+      "loss": 0.283,
+      "step": 440
+    },
+    {
+      "epoch": 0.03265365358101734,
+      "grad_norm": 2.484375,
+      "learning_rate": 4.99192994729748e-05,
+      "loss": 0.2209,
+      "step": 450
+    },
+    {
+      "epoch": 0.033379290327262175,
+      "grad_norm": 4.125,
+      "learning_rate": 4.991462480026693e-05,
+      "loss": 0.3749,
+      "step": 460
+    },
+    {
+      "epoch": 0.034104927073507,
+      "grad_norm": 1.171875,
+      "learning_rate": 4.9909818750898e-05,
+      "loss": 0.2958,
+      "step": 470
+    },
+    {
+      "epoch": 0.03483056381975183,
+      "grad_norm": 5.90625,
+      "learning_rate": 4.990488135021065e-05,
+      "loss": 0.2689,
+      "step": 480
+    },
+    {
+      "epoch": 0.03555620056599666,
+      "grad_norm": 1.0390625,
+      "learning_rate": 4.989981262424017e-05,
+      "loss": 0.2934,
+      "step": 490
+    },
+    {
+      "epoch": 0.036281837312241494,
+      "grad_norm": 5.40625,
+      "learning_rate": 4.989461259971432e-05,
+      "loss": 0.2184,
+      "step": 500
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-500/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-60/README.md b/checkpoint-60/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-60/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-60/adapter_config.json b/checkpoint-60/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-60/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-60/adapter_model.safetensors b/checkpoint-60/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..923e4443196a36f791cb9b39bb828359674ef884
--- /dev/null
+++ b/checkpoint-60/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b3d2abaa059798beb2681ed9ed6952186f839ad3fda1086480ef8de4b3a562c
+size 5919456
diff --git a/checkpoint-60/optimizer.pt b/checkpoint-60/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6557966e18457eb9759c7d8d7e4915932c13e837
--- /dev/null
+++ b/checkpoint-60/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8aeb579b27ccc58f36ea9384c7ad3b69f5ba9d77768847716cb94d99c88e2b6a
+size 11930938
diff --git a/checkpoint-60/rng_state_0.pth b/checkpoint-60/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2b20205969f8ceab0cefd399d3b970db10ce340f
--- /dev/null
+++ b/checkpoint-60/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f1f1853bd0289e0c2e286eebf30fcab4a60b8920f48c47f77d26ce7d8bceb68
+size 15024
diff --git a/checkpoint-60/rng_state_1.pth b/checkpoint-60/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2c1773fb49b04c4d9f7577c4db77aefc1b0088f6
--- /dev/null
+++ b/checkpoint-60/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c70ca697246aed9cd7431fef50ae27f427404f1a9c5b2b804ce1be7c6f1d21e9
+size 15024
diff --git a/checkpoint-60/rng_state_2.pth b/checkpoint-60/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..49e9cc7554ed5cdab4a255dc022f887b1f39056f
--- /dev/null
+++ b/checkpoint-60/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aedcb19fea85bf51e8855f549e5c3c5a5674194d15e5bb6c9d9f240c26b75a27
+size 15024
diff --git a/checkpoint-60/rng_state_3.pth b/checkpoint-60/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1abec23db3478fa5c839adcffe62874068160577
--- /dev/null
+++ b/checkpoint-60/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20e5f3f26883631500b94d11d5efc3f8a6d767ce8d700d55ba632033c3586e02
+size 15024
diff --git a/checkpoint-60/scheduler.pt b/checkpoint-60/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..93dd845849536e1428070d6aa61cea5bf8dac1d5
--- /dev/null
+++ b/checkpoint-60/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44be9c0041c6d3231255d714f395e1fe94715bcee1600354e362197a0d744384
+size 1064
diff --git a/checkpoint-60/trainer_state.json b/checkpoint-60/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..676c46930c5050554d1376f7c8dafe7a44661e92
--- /dev/null
+++ b/checkpoint-60/trainer_state.json
@@ -0,0 +1,63 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.004353820477468979,
+  "eval_steps": 500,
+  "global_step": 60,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-60/training_args.bin b/checkpoint-60/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-60/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-70/README.md b/checkpoint-70/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-70/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-70/adapter_config.json b/checkpoint-70/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-70/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-70/adapter_model.safetensors b/checkpoint-70/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..08781074f305f1823194ae4dd15a9de93f033b49
--- /dev/null
+++ b/checkpoint-70/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b04f4470ab3c4ebcd60a03450b03fda80bbca66c0273b13e268d2eac8237eb39
+size 5919456
diff --git a/checkpoint-70/optimizer.pt b/checkpoint-70/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..22e9a2d46dfbbcbd1a439ecb8996a45b45553b42
--- /dev/null
+++ b/checkpoint-70/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9784285de3291bff11e4224facfaa59117203f4be981aed3f33b133f87418796
+size 11930938
diff --git a/checkpoint-70/rng_state_0.pth b/checkpoint-70/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b63151224da344101d7c5f9d3b321970ae59624c
--- /dev/null
+++ b/checkpoint-70/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3bd9a76af42011b67455136c010822d73652cc5a1906ab585a0dc85da705df4f
+size 15024
diff --git a/checkpoint-70/rng_state_1.pth b/checkpoint-70/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7315b6da721c316e141e62dbe52f6e4f22167db4
--- /dev/null
+++ b/checkpoint-70/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30a1fff25571e8a55aa610c430d18e6cd45f9c001032401ace33bd0acfca569c
+size 15024
diff --git a/checkpoint-70/rng_state_2.pth b/checkpoint-70/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9881002aee7fa02076c4b828d615a04b6fd62976
--- /dev/null
+++ b/checkpoint-70/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b01196072c8cc109737dd5b3b63d9d7ca023ea19e3fc3cf6940d7b2ce8212c3d
+size 15024
diff --git a/checkpoint-70/rng_state_3.pth b/checkpoint-70/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a7559c9f3227d79c916332a1fb9fd2062183b44b
--- /dev/null
+++ b/checkpoint-70/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35448ff23a607170874ca44851806012e8ac2b41960a520b5afed0aaa3c68904
+size 15024
diff --git a/checkpoint-70/scheduler.pt b/checkpoint-70/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d95ca91e7fb4eee143d06cb46b0579b3bd72acfc
--- /dev/null
+++ b/checkpoint-70/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77a48a953b9930e0770226be4a576decb03f1f88a955a0810c00f29dd7a6afd8
+size 1064
diff --git a/checkpoint-70/trainer_state.json b/checkpoint-70/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..6e4833793657f69dbe5f8e50644d90e7f65a7f6c
--- /dev/null
+++ b/checkpoint-70/trainer_state.json
@@ -0,0 +1,70 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.005079457223713809,
+  "eval_steps": 500,
+  "global_step": 70,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-70/training_args.bin b/checkpoint-70/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-70/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-80/README.md b/checkpoint-80/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-80/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-80/adapter_config.json b/checkpoint-80/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-80/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-80/adapter_model.safetensors b/checkpoint-80/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e8cc9e000f799ed78f5c500d4eb123af4c22700b
--- /dev/null
+++ b/checkpoint-80/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9345ed4f5ac90038064bf316f74fd3baaa3993997b729357b7d40cffea51e42
+size 5919456
diff --git a/checkpoint-80/optimizer.pt b/checkpoint-80/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6d4a701e9bfc282217de89549532b72977b8fcd7
--- /dev/null
+++ b/checkpoint-80/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a104617e97324f7db56b8c594a76475aea5a1b40b1273a6e20d57561fbc0eac
+size 11930938
diff --git a/checkpoint-80/rng_state_0.pth b/checkpoint-80/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..76720b9437dc08ee3e12964d6811f69c89fd7c79
--- /dev/null
+++ b/checkpoint-80/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af950c724214e25a469ed811cc52a5d554e829ceb6c1cf7a03bdc24c86d6e0b9
+size 15024
diff --git a/checkpoint-80/rng_state_1.pth b/checkpoint-80/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..004394f1e8517ab729806995011b19aad2a89178
--- /dev/null
+++ b/checkpoint-80/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed86885179845003190a32cdc33e19f0053060bbdfa9a5b23e5b48eff7e4e9c8
+size 15024
diff --git a/checkpoint-80/rng_state_2.pth b/checkpoint-80/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..de54d59c561ea09a5172f38ed86e037fdccbf896
--- /dev/null
+++ b/checkpoint-80/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24aec4517e40fd3b85c1bd78e397be1f786eaa07c086070755f93d9be01a8484
+size 15024
diff --git a/checkpoint-80/rng_state_3.pth b/checkpoint-80/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..30240adedb98a17f51025bcf6635dd0cf7b25d2d
--- /dev/null
+++ b/checkpoint-80/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c01947a5eaede74837243ff3c9d13af7d57cff8f3a4d3c5f34561778bb348151
+size 15024
diff --git a/checkpoint-80/scheduler.pt b/checkpoint-80/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8ef3c03bb9b8ea158a005216bb7b8e81138e1f16
--- /dev/null
+++ b/checkpoint-80/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:430cc59dc6008ac325d1e81fc5b21cab448b30491fc94f6668866ca5ad44c8e9
+size 1064
diff --git a/checkpoint-80/trainer_state.json b/checkpoint-80/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..af240abaea786570f406e3ae76729fce6cfc5b9b
--- /dev/null
+++ b/checkpoint-80/trainer_state.json
@@ -0,0 +1,77 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.0058050939699586385,
+  "eval_steps": 500,
+  "global_step": 80,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-80/training_args.bin b/checkpoint-80/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-80/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984
diff --git a/checkpoint-90/README.md b/checkpoint-90/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-90/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-90/adapter_config.json b/checkpoint-90/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-90/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-90/adapter_model.safetensors b/checkpoint-90/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..335e7085d6775641b8e307c76b837311e2b111c9
--- /dev/null
+++ b/checkpoint-90/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba2de69b75b405c22101606057de526418211c034b3b0abc52172bf6bb7f0edc
+size 5919456
diff --git a/checkpoint-90/optimizer.pt b/checkpoint-90/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..788ad4458481aad3ff771ceb8a4270514c0eadd1
--- /dev/null
+++ b/checkpoint-90/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5724a185e9d6a0915d59115331cf34f564e38032bef099a0ba5081cf119819ce
+size 11930938
diff --git a/checkpoint-90/rng_state_0.pth b/checkpoint-90/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..19720e2b18076fa2ead67de04ee9c96964f4ace1
--- /dev/null
+++ b/checkpoint-90/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3959da6672a208373721817cf0e3832571aa7268aa9e524fa17dfd5d50cbed40
+size 15024
diff --git a/checkpoint-90/rng_state_1.pth b/checkpoint-90/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..24d076615f4330fab138043f520f9f161babe2ab
--- /dev/null
+++ b/checkpoint-90/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f3b85410c8672b9af1749593b68135a96800eca07b66320e42e9061f80da5f2
+size 15024
diff --git a/checkpoint-90/rng_state_2.pth b/checkpoint-90/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4437274ab7ddcbd85c29a0856b3f9c375d14acac
--- /dev/null
+++ b/checkpoint-90/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef04fc5cb5439ad39bd641e4aa7ed5be94ac766d2b3a825174e947dab98b4585
+size 15024
diff --git a/checkpoint-90/rng_state_3.pth b/checkpoint-90/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3efbeb495f36c856b57dd8fadae99936c837c806
--- /dev/null
+++ b/checkpoint-90/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b0ee1ee170078fd3882035945fc254ebb8766d78dfd4d44f2075cedab8774df
+size 15024
diff --git a/checkpoint-90/scheduler.pt b/checkpoint-90/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d902e668e2ed8a87ed002b01ab8e088303523877
--- /dev/null
+++ b/checkpoint-90/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:625419e765f1b653d99619ed711c48daf53d5dddda4d9549b45887b261cf8c2d
+size 1064
diff --git a/checkpoint-90/trainer_state.json b/checkpoint-90/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..c1117a67df6934a800a4527013d6163bed075b98
--- /dev/null
+++ b/checkpoint-90/trainer_state.json
@@ -0,0 +1,84 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.006530730716203468,
+  "eval_steps": 500,
+  "global_step": 90,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 15.6875,
+      "learning_rate": 5e-06,
+      "loss": 3.2562,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 17.5,
+      "learning_rate": 1e-05,
+      "loss": 2.9076,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 19.125,
+      "learning_rate": 1.5e-05,
+      "loss": 3.0281,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 17.875,
+      "learning_rate": 2e-05,
+      "loss": 2.6225,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 13.9375,
+      "learning_rate": 2.5e-05,
+      "loss": 2.5137,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 10.6875,
+      "learning_rate": 3e-05,
+      "loss": 1.994,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 7.53125,
+      "learning_rate": 3.5e-05,
+      "loss": 1.5519,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 7.09375,
+      "learning_rate": 4e-05,
+      "loss": 1.2358,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 8.0625,
+      "learning_rate": 4.5e-05,
+      "loss": 0.953,
+      "step": 90
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-90/training_args.bin b/checkpoint-90/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c28dcaa99e0497b1c861e1642ce3651d4277bea
--- /dev/null
+++ b/checkpoint-90/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4cfc15f45dfcabb458cfe7e31a5e2a32e96eacab3ae733ab7eefd0eca041b5
+size 4984