diff --git a/checkpoint-10/README.md b/checkpoint-10/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-10/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-10/adapter_config.json b/checkpoint-10/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-10/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-10/adapter_model.safetensors b/checkpoint-10/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..37101fe667e9996c09abda2c1865679d77cdb1aa
--- /dev/null
+++ b/checkpoint-10/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4406ae821f305d34c957fbe86bbc2b490df6a944dcaf12d0f5067bfd6eb692d
+size 5919456
diff --git a/checkpoint-10/optimizer.pt b/checkpoint-10/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e6c85f48f24213b450845ae9ce939dc53f5dc55e
--- /dev/null
+++ b/checkpoint-10/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:182e3a17774463e0c6bd5a37fdd0e3c3019690160e9e56a5105f298808efdc4c
+size 11930938
diff --git a/checkpoint-10/rng_state_0.pth b/checkpoint-10/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d07c0d30a3625cf05559328a4f294c03f120ae4e
--- /dev/null
+++ b/checkpoint-10/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:279c231f7db5849b53ea6f61278709c8be27bcc46fc1b36100377bf36c55cfb9
+size 15024
diff --git a/checkpoint-10/rng_state_1.pth b/checkpoint-10/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..75dabdf47ca16589484e7a8764746b8cd6ed5460
--- /dev/null
+++ b/checkpoint-10/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35738ebb9e53709608b7f4feaf1edbde1a19901d813f15922153ded80ead6540
+size 15024
diff --git a/checkpoint-10/rng_state_2.pth b/checkpoint-10/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ddccb6f4cdc16ba7966cfc15f73a942d772698dc
--- /dev/null
+++ b/checkpoint-10/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f411b8c60d90c0733bb03c4955ea2e40ab35464f214cb47cc4d6d0eaa83bc79
+size 15024
diff --git a/checkpoint-10/rng_state_3.pth b/checkpoint-10/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a3043bb36c63411ce0f5a39028bda1d8676ba04d
--- /dev/null
+++ b/checkpoint-10/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7224ff493b87486a3e2c3001115ad539913e8fe95cf25f4bcae3236f97e83f41
+size 15024
diff --git a/checkpoint-10/scheduler.pt b/checkpoint-10/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c34a498b4975a9f1d44b9f61007924fbd3ced8c9
--- /dev/null
+++ b/checkpoint-10/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f83a43a1702074407e12e3670badb2ee9d4306d850fa20aefab82c78c45597c3
+size 1064
diff --git a/checkpoint-10/trainer_state.json b/checkpoint-10/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..11849fe479b805324fe9f1cc94e4e8fcf53ccf39
--- /dev/null
+++ b/checkpoint-10/trainer_state.json
@@ -0,0 +1,28 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.0007256367462448298,
+  "eval_steps": 500,
+  "global_step": 10,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-10/training_args.bin b/checkpoint-10/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-10/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-100/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-100/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b74af531369a1d58719c9a1c595bea5fa49d4571
--- /dev/null
+++ b/checkpoint-100/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7bcb27035964bb245798a3a05ab2c41807599f7ef3993bf547a298d2a4abf1e
+size 5919456
diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..976c7498b499daaee5cdfdd165391e5f90d82e5d
--- /dev/null
+++ b/checkpoint-100/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4cb35f5dac123d790c75782fc60bc45b28afd76cdf742d7d98cceda5ae4d25a4
+size 11930938
diff --git a/checkpoint-100/rng_state_0.pth b/checkpoint-100/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c8bbbc393d2f7f6d7f761800ae69c6c68a174255
--- /dev/null
+++ b/checkpoint-100/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85f43e42ff30186bb51f3d90dcd7d261d6e09960636961fd696f9478303d1331
+size 15024
diff --git a/checkpoint-100/rng_state_1.pth b/checkpoint-100/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..edd22345065182eb183497bc022e0478fcff51b0
--- /dev/null
+++ b/checkpoint-100/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:daf210db9013b20416569b6811b878570fbbf461f867de41a8a69fd07f0d2c8c
+size 15024
diff --git a/checkpoint-100/rng_state_2.pth b/checkpoint-100/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a0d578b7350bb04a417db4a4c480b09aca69ee1a
--- /dev/null
+++ b/checkpoint-100/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:132e0dad8f05cba3da38386b81951c801df7c5c2c1cf9e06b5d359b7b92422da
+size 15024
diff --git a/checkpoint-100/rng_state_3.pth b/checkpoint-100/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b148f15da5fe8c4b609ef13484f64240b435570a
--- /dev/null
+++ b/checkpoint-100/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:441e2aab46e3935d5d49029fda3ebaf07053ac3a8e8a6eb7aca038ab1127bea1
+size 15024
diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..87e94225876cb9e878c3e22abce52c1dca5101fd
--- /dev/null
+++ b/checkpoint-100/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dc19241d2a41bda65817df0018620c7c8c7fc240c9e0ee55d613dab2e95b85b4
+size 1064
diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..9b364a4edd415608aca5994bcbe5d3257238705d
--- /dev/null
+++ b/checkpoint-100/trainer_state.json
@@ -0,0 +1,91 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.007256367462448298,
+  "eval_steps": 500,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-100/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-110/README.md b/checkpoint-110/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-110/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-110/adapter_config.json b/checkpoint-110/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-110/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-110/adapter_model.safetensors b/checkpoint-110/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..44cde0e14c19142b23dc84fc18116a973be34ba1
--- /dev/null
+++ b/checkpoint-110/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0357a5bc50e5aa11056aca327190ef039232e17e2c30152c3f28f2ced07f80d7
+size 5919456
diff --git a/checkpoint-110/optimizer.pt b/checkpoint-110/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b2431b028c6d5c034ca04b485bb89d5b366775b8
--- /dev/null
+++ b/checkpoint-110/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bb407bfe2800cc6fdca75862cafe072c48ae7906dc275dfa7e98ff5edaf0e08
+size 11930938
diff --git a/checkpoint-110/rng_state_0.pth b/checkpoint-110/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ab1b3f8498e8893f53b0e7445652083013543648
--- /dev/null
+++ b/checkpoint-110/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac95db18ae6a1e414f19563e15335ec1a3d44d5b26a3896a591a42bf53daac57
+size 15024
diff --git a/checkpoint-110/rng_state_1.pth b/checkpoint-110/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7760ee8749e66a9d62b997c70fef033a376345a8
--- /dev/null
+++ b/checkpoint-110/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ad2adf8c9d84012d5c08bc34b7d7b7bd8f571238b97deba7b563bc8579f284e
+size 15024
diff --git a/checkpoint-110/rng_state_2.pth b/checkpoint-110/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..94ca491bdda1c6621eb39af13f7cf2922e82a4dd
--- /dev/null
+++ b/checkpoint-110/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d02fc29e95ce367f0b8273bbbf6e41186c317282c9a486968d768ffcb716f8dd
+size 15024
diff --git a/checkpoint-110/rng_state_3.pth b/checkpoint-110/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3aea3e1a77a34852a8edf99d524471d3e209ba56
--- /dev/null
+++ b/checkpoint-110/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:540b9cf222feb8019c875aee3fd37ce5b892ea395b93ddd0b75459462687e321
+size 15024
diff --git a/checkpoint-110/scheduler.pt b/checkpoint-110/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4d85b42865b3854e3a9062122b9d9ed6fe56c511
--- /dev/null
+++ b/checkpoint-110/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74829487ac903b3ed35b75a02da621979cc3f366050c3017755d2bfa4119fb65
+size 1064
diff --git a/checkpoint-110/trainer_state.json b/checkpoint-110/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..950155d7a58efaeb7906a6f735197882a7dae50a
--- /dev/null
+++ b/checkpoint-110/trainer_state.json
@@ -0,0 +1,98 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.007982004208693128,
+  "eval_steps": 500,
+  "global_step": 110,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-110/training_args.bin b/checkpoint-110/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-110/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-120/README.md b/checkpoint-120/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-120/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-120/adapter_config.json b/checkpoint-120/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-120/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-120/adapter_model.safetensors b/checkpoint-120/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4dcd3ee44b51a80847ee8aa81e8af661952b8211
--- /dev/null
+++ b/checkpoint-120/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3edbbf8dbcc57d82f4da1d0b33f9433b447ac70aab7cdfe73fd348417b6127c1
+size 5919456
diff --git a/checkpoint-120/optimizer.pt b/checkpoint-120/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d932d75bd744b5903d5dac5fad9052668745d1a9
--- /dev/null
+++ b/checkpoint-120/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c2d97d46a86ec0f3e481bffb226c09dfb3c7a1af0595f32b22e685a0a0c3a54
+size 11930938
diff --git a/checkpoint-120/rng_state_0.pth b/checkpoint-120/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3d502a9e5a520de440672e348bb1c204e25bdd4a
--- /dev/null
+++ b/checkpoint-120/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e010d5c5e5ff459e09cee093e035058bce80bd0e562b9008cf49e37a37c4a265
+size 15024
diff --git a/checkpoint-120/rng_state_1.pth b/checkpoint-120/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..97e170d86dde07d3718359b683368b6ef495a27a
--- /dev/null
+++ b/checkpoint-120/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ceaec8c84867fda1405ca685c206ff5498d51b755970edb435f4777d1649c24
+size 15024
diff --git a/checkpoint-120/rng_state_2.pth b/checkpoint-120/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1f4509894b817e8040026ae28468826ddeeaec22
--- /dev/null
+++ b/checkpoint-120/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:791eebd4302125380e3da4e87668a4bb1db8af54a2e9f9519cb225a5eefb78b6
+size 15024
diff --git a/checkpoint-120/rng_state_3.pth b/checkpoint-120/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..73faf1586f171185dab1d897d8a662e1288539f7
--- /dev/null
+++ b/checkpoint-120/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b5e8577ac55da749e71c292571c66ba7068eaeeac8f69a2d9ecb004c4ea24df
+size 15024
diff --git a/checkpoint-120/scheduler.pt b/checkpoint-120/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6f0751e2734ecca15f6989c82d3931e947f5a533
--- /dev/null
+++ b/checkpoint-120/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed7d425e862f330c41856fd5fe8baa40e1998a2b4162a2709a8d1aba7a072f7e
+size 1064
diff --git a/checkpoint-120/trainer_state.json b/checkpoint-120/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..7b33e315a7399a6dc172613294ba58eda099a7b2
--- /dev/null
+++ b/checkpoint-120/trainer_state.json
@@ -0,0 +1,105 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.008707640954937958,
+  "eval_steps": 500,
+  "global_step": 120,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-120/training_args.bin b/checkpoint-120/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-120/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-130/README.md b/checkpoint-130/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-130/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-130/adapter_config.json b/checkpoint-130/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-130/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-130/adapter_model.safetensors b/checkpoint-130/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8fd96cbfe46d73cf895518a94ae5a6b49d35ec6c
--- /dev/null
+++ b/checkpoint-130/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b8e088f34ddad755ae731fcfb964ac14021395873fd576c793309b41895a447
+size 5919456
diff --git a/checkpoint-130/optimizer.pt b/checkpoint-130/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3fae5d59d3f8fe8494d2874359b2e40fc3914124
--- /dev/null
+++ b/checkpoint-130/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b0cd305b6dc9a4a71a0c0e01f6956397839e506c56dbc71ad6dcacaa60e54b6
+size 11930938
diff --git a/checkpoint-130/rng_state_0.pth b/checkpoint-130/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8b936af33e9d245baa21fd749c30683b93cd7deb
--- /dev/null
+++ b/checkpoint-130/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ced0c6ba13f477a0dbd44034592fe000f226e71898cbab5bf87ce59dc6bde36
+size 15024
diff --git a/checkpoint-130/rng_state_1.pth b/checkpoint-130/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..042841fdacad262623786e1a413c9ff2141e72dd
--- /dev/null
+++ b/checkpoint-130/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e97d793be909b79220b59b211d87fda9d35184d2305c00641e9b4531b73b8441
+size 15024
diff --git a/checkpoint-130/rng_state_2.pth b/checkpoint-130/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a58abcc92adfb7f3e1dd84f85898b9b66ce6fd32
--- /dev/null
+++ b/checkpoint-130/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60416c656b12aaecd01e32e964532f371c0a6b02a4b9b91ccfdc35d45dce0050
+size 15024
diff --git a/checkpoint-130/rng_state_3.pth b/checkpoint-130/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3bce0c189cda9aff58d716ffa2eca649bd8a6592
--- /dev/null
+++ b/checkpoint-130/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:218ddffb5978f25094e6ad3cfbfc85ad7b807a183e3bc9f6f15bd471542d7273
+size 15024
diff --git a/checkpoint-130/scheduler.pt b/checkpoint-130/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cde855fcbd7314180b62ebc49700f02fe1f703f7
--- /dev/null
+++ b/checkpoint-130/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28e2fee165899e7e8256fbe1ade72b87b60a73a6f8ec65162793c9cfaf3c5dea
+size 1064
diff --git a/checkpoint-130/trainer_state.json b/checkpoint-130/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..c0c877abb8847ad74614d3add69554bdf747dd9e
--- /dev/null
+++ b/checkpoint-130/trainer_state.json
@@ -0,0 +1,112 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.009433277701182788,
+  "eval_steps": 500,
+  "global_step": 130,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-130/training_args.bin b/checkpoint-130/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-130/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-140/README.md b/checkpoint-140/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-140/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-140/adapter_config.json b/checkpoint-140/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-140/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-140/adapter_model.safetensors b/checkpoint-140/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e0defaaff0654f6c4a0b672203282efeb31598d9
--- /dev/null
+++ b/checkpoint-140/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e92ea3abc591dc3fd8db262b17a834a97f9b207639b8c8a4202cc830620ad31
+size 5919456
diff --git a/checkpoint-140/optimizer.pt b/checkpoint-140/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ec09f7e4e0a464b33060db9268a5cdabf0762273
--- /dev/null
+++ b/checkpoint-140/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2655f85f4e3e2b20b017eb2bb5965d4d222442c92cc04d3028defc26b55eab7
+size 11930938
diff --git a/checkpoint-140/rng_state_0.pth b/checkpoint-140/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4a45d49b702e38a5a5d428f1c4170f10ba23d86a
--- /dev/null
+++ b/checkpoint-140/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ad67dbef2a21b26f3117ca45d621957bf72b1116535cf6e524b17661b94b1a9
+size 15024
diff --git a/checkpoint-140/rng_state_1.pth b/checkpoint-140/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2c6cf166b16a44d3e92adf964c8fc8ac82daa466
--- /dev/null
+++ b/checkpoint-140/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca3afd4f067268e4c6ff34242266c9e70bce106dd4d7365781bb893119a4033d
+size 15024
diff --git a/checkpoint-140/rng_state_2.pth b/checkpoint-140/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6bc0e1fa490cb0406141b021536c5be4e11e69e0
--- /dev/null
+++ b/checkpoint-140/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7a56e9bc058e763d68d477e80d923c2fe559a75d518ac8d5d693397a88304b3
+size 15024
diff --git a/checkpoint-140/rng_state_3.pth b/checkpoint-140/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..19d3b9f2dd05718c2f4579d52dfb0ab3259e1869
--- /dev/null
+++ b/checkpoint-140/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8aef494a54b19f2a6c92fb251d8acadbfc7c21bcba926f5a7f5fa134981bb678
+size 15024
diff --git a/checkpoint-140/scheduler.pt b/checkpoint-140/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8ea0553293c234b353ad578ce098df48337e985e
--- /dev/null
+++ b/checkpoint-140/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30859c02f6b970089c798019b31d9595b893cd0ad7ef1e694d1a5869622b8738
+size 1064
diff --git a/checkpoint-140/trainer_state.json b/checkpoint-140/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f43fb7bd3bce87dd697dce23024967eeba24f5e3
--- /dev/null
+++ b/checkpoint-140/trainer_state.json
@@ -0,0 +1,119 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.010158914447427617,
+  "eval_steps": 500,
+  "global_step": 140,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-140/training_args.bin b/checkpoint-140/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-140/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-150/README.md b/checkpoint-150/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-150/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-150/adapter_config.json b/checkpoint-150/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-150/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-150/adapter_model.safetensors b/checkpoint-150/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..30d9784a239ae33d6754a7d1662d559c60f401b4
--- /dev/null
+++ b/checkpoint-150/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dda4a9c72634fd4e71da815395a7127ffd6b6a4cf472d57fb02f9d102866177f
+size 5919456
diff --git a/checkpoint-150/optimizer.pt b/checkpoint-150/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..19d20ed2e5372319d5bc7ad6629218531d1a0269
--- /dev/null
+++ b/checkpoint-150/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b75ce68a3a6892278035b4a46b87384dd6eb3f0270ba1243b5db34285ebf6b93
+size 11930938
diff --git a/checkpoint-150/rng_state_0.pth b/checkpoint-150/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..106b260c38aee4031442b01a0b98f48c8b303ec3
--- /dev/null
+++ b/checkpoint-150/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c342af2ce35811f7314d04dcf27fe047ef7a2c2c65a53827cf5bfa3bbef9abbb
+size 15024
diff --git a/checkpoint-150/rng_state_1.pth b/checkpoint-150/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..886c97fe5792aca73064ce33a7d132bf9491199f
--- /dev/null
+++ b/checkpoint-150/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:732cda9265841aab840d0742ab54e81d4890cc436da4ad72a7491a2de6e456cd
+size 15024
diff --git a/checkpoint-150/rng_state_2.pth b/checkpoint-150/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..64926e520c41683a9beadc0b8bc88f1985920d14
--- /dev/null
+++ b/checkpoint-150/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9e93aaf91c3d45dc0a00b2862a0b23147bc87200884e67202507624081ba206
+size 15024
diff --git a/checkpoint-150/rng_state_3.pth b/checkpoint-150/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3f7bb92cc45f98dcd8c4767f5cedf6b4ddd4068f
--- /dev/null
+++ b/checkpoint-150/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bde38e9c7ebd4dcc6310f8e51cdb47e2f01b8ae902f2ef5613c6f4a36b2b5231
+size 15024
diff --git a/checkpoint-150/scheduler.pt b/checkpoint-150/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..25ca28c6302d8e47a7043bca8e833450fa92ae52
--- /dev/null
+++ b/checkpoint-150/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45654c01dd78f553696a346db1ede715fbcdf68458bb3234128c8ff7cd662376
+size 1064
diff --git a/checkpoint-150/trainer_state.json b/checkpoint-150/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d1649b7b562e2c8b7c46ea57037d4e7491f8c3b9
--- /dev/null
+++ b/checkpoint-150/trainer_state.json
@@ -0,0 +1,126 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.010884551193672447,
+  "eval_steps": 500,
+  "global_step": 150,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-150/training_args.bin b/checkpoint-150/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-150/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-160/README.md b/checkpoint-160/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-160/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-160/adapter_config.json b/checkpoint-160/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-160/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-160/adapter_model.safetensors b/checkpoint-160/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d997e4b4dfec31503d06c63e6215a69d0f4ad105
--- /dev/null
+++ b/checkpoint-160/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd75e3594b283b04e197ae7ce642510a613228d0d0442850d59a502f1e6a447d
+size 5919456
diff --git a/checkpoint-160/optimizer.pt b/checkpoint-160/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cd9231298706c050ad5a14283587dc91ee02ef06
--- /dev/null
+++ b/checkpoint-160/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f99cd761c3d84d33563eee633d1d24b06f75d30a12f508e607134e2ea1dd629e
+size 11930938
diff --git a/checkpoint-160/rng_state_0.pth b/checkpoint-160/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1a348a58aeccdd834c1f8384d2c678d70847862b
--- /dev/null
+++ b/checkpoint-160/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ee31f68abe6b432686fdd272da2eeee5b73b4f8821bb04740f607c935a3aa05
+size 15024
diff --git a/checkpoint-160/rng_state_1.pth b/checkpoint-160/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d240092b087cdd0689f6c165bac6ef292c75d586
--- /dev/null
+++ b/checkpoint-160/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ccfaa2295f585cb34fb30444c879af586c3b440d8fe9e6b9aea87172ec7cd8b
+size 15024
diff --git a/checkpoint-160/rng_state_2.pth b/checkpoint-160/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6775a2dea14ed6e60f977be88fb2499831856136
--- /dev/null
+++ b/checkpoint-160/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e45ead505fead6b579c877849f1ab0e0143b1148c8d7d9edeac5bc999e7e7f49
+size 15024
diff --git a/checkpoint-160/rng_state_3.pth b/checkpoint-160/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7acca8ad061b6f3e98c8b2ec7ab4b61ba316c53c
--- /dev/null
+++ b/checkpoint-160/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a58731fe5c8e37c116d083c9cf2ea45cc7f0b235b603682161aea7c0598f46d9
+size 15024
diff --git a/checkpoint-160/scheduler.pt b/checkpoint-160/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b5cdf2c11b91dd7969b3682b0c6b8d702bf6ce6f
--- /dev/null
+++ b/checkpoint-160/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c230b5eaa60495f70fe66ab028324a99ac497cf81b1f02563eca80902e2ed0d
+size 1064
diff --git a/checkpoint-160/trainer_state.json b/checkpoint-160/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..bafb7a20c38b171aaeb35ba1b7212c9335fa59fa
--- /dev/null
+++ b/checkpoint-160/trainer_state.json
@@ -0,0 +1,133 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.011610187939917277,
+  "eval_steps": 500,
+  "global_step": 160,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-160/training_args.bin b/checkpoint-160/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-160/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-170/README.md b/checkpoint-170/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-170/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-170/adapter_config.json b/checkpoint-170/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-170/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-170/adapter_model.safetensors b/checkpoint-170/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7a8c6a3549dc0409680f3cd970fde532c5f5b7fb
--- /dev/null
+++ b/checkpoint-170/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbdedac24c25ec5bbfee6a2b81a4d55850ce4d03be6262015092c3f59cef55f2
+size 5919456
diff --git a/checkpoint-170/optimizer.pt b/checkpoint-170/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f7157cc0fff50d4f9ff7ace4d852b5e3e4951196
--- /dev/null
+++ b/checkpoint-170/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58e1d1af4c3458132cfb73203b05b2b313bbf6a489821ceb78474afc8b732b2b
+size 11930938
diff --git a/checkpoint-170/rng_state_0.pth b/checkpoint-170/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0190fba96c029f1f316a97d8b807b161fc35b029
--- /dev/null
+++ b/checkpoint-170/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04b538b0af287645a40ce0df2860d4d1942ce7e4e6877b83687385b189a6e760
+size 15024
diff --git a/checkpoint-170/rng_state_1.pth b/checkpoint-170/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b268b03046e21e3683e2e0a4036ab6791a429e18
--- /dev/null
+++ b/checkpoint-170/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aab51a83d1aba072b3826961dc0d5c41018143d192a7b20b35cb398bb6e6a07d
+size 15024
diff --git a/checkpoint-170/rng_state_2.pth b/checkpoint-170/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..94467028ff324a304799c5d14be05069f7000437
--- /dev/null
+++ b/checkpoint-170/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa8def4f7d0cade6a61e0faf6026d6fbf8f9a96f925d11de9c3e2566bd370cf6
+size 15024
diff --git a/checkpoint-170/rng_state_3.pth b/checkpoint-170/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3a9cac704de366bbfd6be2c1775d76cc7a42733e
--- /dev/null
+++ b/checkpoint-170/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bce69a0041979668f11d6085ed3478968bd2dc3d7aeed480f09e380cf7f58cfc
+size 15024
diff --git a/checkpoint-170/scheduler.pt b/checkpoint-170/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..44a190969689858b6278b12246c43c784366cbe9
--- /dev/null
+++ b/checkpoint-170/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76693fcd5ba2c99826a680e80920f52de5e4c2ab57afef9add078b5cca2d8042
+size 1064
diff --git a/checkpoint-170/trainer_state.json b/checkpoint-170/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..ef840a03745076c4f6569638d8c985c29ec6f8f8
--- /dev/null
+++ b/checkpoint-170/trainer_state.json
@@ -0,0 +1,140 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.012335824686162107,
+  "eval_steps": 500,
+  "global_step": 170,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-170/training_args.bin b/checkpoint-170/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-170/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-180/README.md b/checkpoint-180/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-180/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-180/adapter_config.json b/checkpoint-180/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-180/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-180/adapter_model.safetensors b/checkpoint-180/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d1733b814a48a808c5ecadebe049390a2ccc9f15
--- /dev/null
+++ b/checkpoint-180/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b582236868c1541aa9557424e64bbd6a55eae671d23f495dacce3cf50ec617e
+size 5919456
diff --git a/checkpoint-180/optimizer.pt b/checkpoint-180/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b764e79d464b8be03ec143709071d491a082b7e5
--- /dev/null
+++ b/checkpoint-180/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01beab0aa0afcd04385e1bb5f472ca5ac0b734b100e60e28becd05fb5887c470
+size 11930938
diff --git a/checkpoint-180/rng_state_0.pth b/checkpoint-180/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a4bf3c82228f9034814d7c954087afe9a1fa5b5f
--- /dev/null
+++ b/checkpoint-180/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:391491e98ac692b7e5835106d7f5cbacd49cdd07a44cc27d7b58742c4dc066be
+size 15024
diff --git a/checkpoint-180/rng_state_1.pth b/checkpoint-180/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e752b39c1c9894ab1a1d41365fe600feb5bd143c
--- /dev/null
+++ b/checkpoint-180/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0d1421c313b5149e2c40765ea51d8b558dc49bf8f3196a5026730b9ddc0b1ef
+size 15024
diff --git a/checkpoint-180/rng_state_2.pth b/checkpoint-180/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..882ddf2069a3f27d9a74bdc6aea9b1266bbbb173
--- /dev/null
+++ b/checkpoint-180/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d9ec9798d1f0478430ddf469073d6b795a3038d3094c7819b37dd37b6e57e60
+size 15024
diff --git a/checkpoint-180/rng_state_3.pth b/checkpoint-180/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7be2c9933f6912330499d4df75c20c65d8289227
--- /dev/null
+++ b/checkpoint-180/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0c2257b0791cfa018dcbff0ed5913d5cd0cf1f42b4364228919a2f3bfbe68d1
+size 15024
diff --git a/checkpoint-180/scheduler.pt b/checkpoint-180/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a52cd3aef55d42255d45171222b755262fd5c541
--- /dev/null
+++ b/checkpoint-180/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c97cae5b4051546ae0f2aa09550f8b8ab1b812cb8a96e710b21958cd43ec2af
+size 1064
diff --git a/checkpoint-180/trainer_state.json b/checkpoint-180/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..385f92b5f091c6d4ab119a873f3c551bf2df7272
--- /dev/null
+++ b/checkpoint-180/trainer_state.json
@@ -0,0 +1,147 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.013061461432406937,
+  "eval_steps": 500,
+  "global_step": 180,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-180/training_args.bin b/checkpoint-180/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-180/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-190/README.md b/checkpoint-190/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-190/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-190/adapter_config.json b/checkpoint-190/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-190/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-190/adapter_model.safetensors b/checkpoint-190/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f4c79ac3a76461eae34251a2fba5ad1c20a95392
--- /dev/null
+++ b/checkpoint-190/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35c42102b327dfc4867dc229c63620343509df87242d987856a6bcc88a8fb765
+size 5919456
diff --git a/checkpoint-190/optimizer.pt b/checkpoint-190/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f51f0111b03df833de52e0e26716dd9d89390ea6
--- /dev/null
+++ b/checkpoint-190/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fabd6487842ec7091d617f93f7362c69b7a5644fc07c1f8b4d51458dc362366f
+size 11930938
diff --git a/checkpoint-190/rng_state_0.pth b/checkpoint-190/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e0c0211e449e0fe4cae423b501f0a6079a134f3f
--- /dev/null
+++ b/checkpoint-190/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:634187bc0806802ba140fc58ef1da9ad47cab37b0a88b924abc908f1a6e48084
+size 15024
diff --git a/checkpoint-190/rng_state_1.pth b/checkpoint-190/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..db22be4a1419388cfcaec36e89d291d8473c2383
--- /dev/null
+++ b/checkpoint-190/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc7b535a8dc270694543ed47c3a9d756c8e9e91eaeaf7606c23e1f1871e9f459
+size 15024
diff --git a/checkpoint-190/rng_state_2.pth b/checkpoint-190/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..88e4ce07c281508ccd300c390a5fd12b5ea5e41f
--- /dev/null
+++ b/checkpoint-190/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cdefdbea22c530e923aa2c703921cde9f526199183b066c1f92041ae90ee88cd
+size 15024
diff --git a/checkpoint-190/rng_state_3.pth b/checkpoint-190/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d03fce08df96a573b7d5dcf7a498f6765950b5aa
--- /dev/null
+++ b/checkpoint-190/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a5cf3867ec2aff94e354518b57eb2ae50d1a25d4c71ee748970519e8e123f82
+size 15024
diff --git a/checkpoint-190/scheduler.pt b/checkpoint-190/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..772524c3cdfa6475905664c6d2a7ff6e6fd9e54a
--- /dev/null
+++ b/checkpoint-190/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37483329099dd7ca8f16fa7cca91529da11d3b5911f149213582a18fcf20c0ee
+size 1064
diff --git a/checkpoint-190/trainer_state.json b/checkpoint-190/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..4c2cd924502bae78ba9d7df5d73e6bac77f3e268
--- /dev/null
+++ b/checkpoint-190/trainer_state.json
@@ -0,0 +1,154 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.013787098178651766,
+  "eval_steps": 500,
+  "global_step": 190,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-190/training_args.bin b/checkpoint-190/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-190/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-20/README.md b/checkpoint-20/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-20/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-20/adapter_config.json b/checkpoint-20/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-20/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-20/adapter_model.safetensors b/checkpoint-20/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..913573a89fab61fc962d96e5257c36509fa8488b
--- /dev/null
+++ b/checkpoint-20/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f90b24819dc00c62487794464aec4c9ec8bd38b2d55aaf37145d1a076ad54e00
+size 5919456
diff --git a/checkpoint-20/optimizer.pt b/checkpoint-20/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2375cbf194d038f85809ef645852fb643e820e39
--- /dev/null
+++ b/checkpoint-20/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd8819432fcfda6f5ffbead645e6e062787a2196a373611fb1441f08a8245d10
+size 11930938
diff --git a/checkpoint-20/rng_state_0.pth b/checkpoint-20/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..27c26966ba11b1537e03fb58b5745c13bda8e07c
--- /dev/null
+++ b/checkpoint-20/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f1260d1909498eca402d05c2a419952d47a5ad70175dcbdd8b00c91a5fc45fa
+size 15024
diff --git a/checkpoint-20/rng_state_1.pth b/checkpoint-20/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..db68c71e26cb6f3a051cebb674e1c59e4a372b54
--- /dev/null
+++ b/checkpoint-20/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db4473e907d1644f0438ef7f0c7704d02103e85b9719510e499dcd424dd7a698
+size 15024
diff --git a/checkpoint-20/rng_state_2.pth b/checkpoint-20/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4793fad96abae27f6b655664bfc74d07201da74e
--- /dev/null
+++ b/checkpoint-20/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc5c885ac98cfdee09d2cbd67013c6d68e4008188e8818b09c31ef431a1d7eb8
+size 15024
diff --git a/checkpoint-20/rng_state_3.pth b/checkpoint-20/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..989dad2ada4a51ebd64cb87b3e27f27217ee5d56
--- /dev/null
+++ b/checkpoint-20/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1e02830b4a3aa3e8be30cc4b5cdb6d781ffb6ba2f9b7fc7de429e8f14ff9eb2
+size 15024
diff --git a/checkpoint-20/scheduler.pt b/checkpoint-20/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c8406d79f6ff5aa52f1241dedf5b336f61773ac8
--- /dev/null
+++ b/checkpoint-20/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ee32020f194b618f6cc6dcf9128ef33a79923274f87f7a23a4c468a1f49a0c1
+size 1064
diff --git a/checkpoint-20/trainer_state.json b/checkpoint-20/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..34094bd333d4732152d500cf866a856d13410f23
--- /dev/null
+++ b/checkpoint-20/trainer_state.json
@@ -0,0 +1,35 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.0014512734924896596,
+  "eval_steps": 500,
+  "global_step": 20,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-20/training_args.bin b/checkpoint-20/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-20/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-200/README.md b/checkpoint-200/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-200/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-200/adapter_config.json b/checkpoint-200/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-200/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-200/adapter_model.safetensors b/checkpoint-200/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a911c73e5ece4d49eff94a76870a02a04b3e0e85
--- /dev/null
+++ b/checkpoint-200/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:52fe7fc87b19d829e5b3d4112386435bd254f8a65cee40f74c8fe7f846f84d73
+size 5919456
diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f27649a5517f58d005ec5759f20a6f2252d5ec46
--- /dev/null
+++ b/checkpoint-200/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b4e2e88761cf7d17c3acc189c684840b6af529a6a4010058e27e60e39bdf838
+size 11930938
diff --git a/checkpoint-200/rng_state_0.pth b/checkpoint-200/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cbe6ed9bd7015ba53a00eac5dbf6c207bee426f9
--- /dev/null
+++ b/checkpoint-200/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f1cba52d741c919af5485dbd6f2f7f1d3e0a85d6360bd235ada5b0b422cbb85
+size 15024
diff --git a/checkpoint-200/rng_state_1.pth b/checkpoint-200/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5e368466a71fee96b42b881f21bf712a4da6d76f
--- /dev/null
+++ b/checkpoint-200/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6df0bf21b793eda3c40de2d2c78534d169305cdd2f29bdcd9cb73fac0665821
+size 15024
diff --git a/checkpoint-200/rng_state_2.pth b/checkpoint-200/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5efcda4ce23425b42660c0142199f06e24c4ecb6
--- /dev/null
+++ b/checkpoint-200/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6248867138fb350cb0f44cdac9e1a95bb8b6784149c2399be64db251dc3dd906
+size 15024
diff --git a/checkpoint-200/rng_state_3.pth b/checkpoint-200/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e92c71a16815cfe7ecbfb3568fc623dee858bc62
--- /dev/null
+++ b/checkpoint-200/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eef7858543a2b266a803e1226a5565f91deb128b83aa5d1d3a7d6881fecffcd3
+size 15024
diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..585a9813b4db512aa83f34b151ccb574ba560544
--- /dev/null
+++ b/checkpoint-200/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f09555764aebc8513c7bfea5cecca6fd6ad4bb7a7acf11b114512be838008e2
+size 1064
diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..81ffb3b5cf8bca49d5d635a193cd37c62793f5d4
--- /dev/null
+++ b/checkpoint-200/trainer_state.json
@@ -0,0 +1,161 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.014512734924896596,
+  "eval_steps": 500,
+  "global_step": 200,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-200/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-210/README.md b/checkpoint-210/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-210/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-210/adapter_config.json b/checkpoint-210/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-210/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-210/adapter_model.safetensors b/checkpoint-210/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..312525dd92c587270852799adc13d71d4a960da6
--- /dev/null
+++ b/checkpoint-210/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99ea239f6eef5f63a78cd0b2169ed53a7836cd0ed8acc7c57ec76da2ce2399cc
+size 5919456
diff --git a/checkpoint-210/optimizer.pt b/checkpoint-210/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..102f180a2f9f39fb54d5bc892936fca1e3f6d5ab
--- /dev/null
+++ b/checkpoint-210/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70cddbeaa4da15423edb79e1b8f8b4beca9bd1aa4dd4766ea6f8317835935711
+size 11930938
diff --git a/checkpoint-210/rng_state_0.pth b/checkpoint-210/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b50684fe0b3ee395929d563c6d176ea95f130e03
--- /dev/null
+++ b/checkpoint-210/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0758b52eaf66a9d3841ff67a6781c30823a456df25f8d8dce667c6e30632205
+size 15024
diff --git a/checkpoint-210/rng_state_1.pth b/checkpoint-210/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a59ad732531e10e93f77e6abe03bfa81c396d95c
--- /dev/null
+++ b/checkpoint-210/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e2a0a6d685c08e2c32c6dcdc27b6a2ecc5d4df518a6f8cb4363bb120173c519
+size 15024
diff --git a/checkpoint-210/rng_state_2.pth b/checkpoint-210/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..431b9592b6ebe9e50e467b14e03f71b9b0348a95
--- /dev/null
+++ b/checkpoint-210/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:063e66491e187afaf0b8ca5021700297f4aff6c6851e81f4c8a2b7755b680287
+size 15024
diff --git a/checkpoint-210/rng_state_3.pth b/checkpoint-210/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..273cf65890be5f6484d1de3986ab4871c1b30e52
--- /dev/null
+++ b/checkpoint-210/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b36d1281b90013ae9d40beccbd5b80c807353ef2f41f404c56dcd2be42e1497
+size 15024
diff --git a/checkpoint-210/scheduler.pt b/checkpoint-210/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3d8a4ea46db83014eb9821ebd6c1d5da09ce960a
--- /dev/null
+++ b/checkpoint-210/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0e6ec46acb410df3eeaac43aacb065af3ae45b54be2e7940e4383b02ddca18c
+size 1064
diff --git a/checkpoint-210/trainer_state.json b/checkpoint-210/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f9261d31d40cdeeae523b5f4cf6d47704796bf03
--- /dev/null
+++ b/checkpoint-210/trainer_state.json
@@ -0,0 +1,168 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.015238371671141426,
+  "eval_steps": 500,
+  "global_step": 210,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-210/training_args.bin b/checkpoint-210/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-210/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-220/README.md b/checkpoint-220/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-220/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-220/adapter_config.json b/checkpoint-220/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-220/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-220/adapter_model.safetensors b/checkpoint-220/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c7e7dc76fdcb4322649182ce245664aca822854d
--- /dev/null
+++ b/checkpoint-220/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e494bc82e90d73e11faef53e926d1a9b9b1d875278f065a1de26b07b34e83b5
+size 5919456
diff --git a/checkpoint-220/optimizer.pt b/checkpoint-220/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a71b45db27251dc758a80886f421960a49517585
--- /dev/null
+++ b/checkpoint-220/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:094ec206422062dc543ee2a6e88aec8d56c890c33d7f0cf6eb32bc01c3d35396
+size 11930938
diff --git a/checkpoint-220/rng_state_0.pth b/checkpoint-220/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cd86dbca0e6b3468b401c07f26c1eb3fba27b6ba
--- /dev/null
+++ b/checkpoint-220/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:318482ffbb3682b198e21782725194b8e4a47144cd19447822b92c9741381e64
+size 15024
diff --git a/checkpoint-220/rng_state_1.pth b/checkpoint-220/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..98838ae1c3d248b66950bc4d113e7a7fd867389d
--- /dev/null
+++ b/checkpoint-220/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6649fb78ede3c075b0e7d5b7f1a9e91be8ec03fa87f75712323f2711c7a7e883
+size 15024
diff --git a/checkpoint-220/rng_state_2.pth b/checkpoint-220/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b6864283abb094df0fb052b1924e96aff0a106c8
--- /dev/null
+++ b/checkpoint-220/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d147c99bfe79a2f8bcaf0eef24750e3d64ada4e06a65d7908b38b89af99b831
+size 15024
diff --git a/checkpoint-220/rng_state_3.pth b/checkpoint-220/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c634d3a8e7a21b8baf51a0f3da6a906007acd1cc
--- /dev/null
+++ b/checkpoint-220/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f788cb78149d5b33e0c5eac77d7eea5e6e80a94245857d9ff13fa1685c3dfe3d
+size 15024
diff --git a/checkpoint-220/scheduler.pt b/checkpoint-220/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..269a94935565008de015c41661a2e10334fa3ce9
--- /dev/null
+++ b/checkpoint-220/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69b9cdf7921c6c309955792124fa6f66f5daa0fa1f569d8a697ad9cb04351f2c
+size 1064
diff --git a/checkpoint-220/trainer_state.json b/checkpoint-220/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..ba0039dc254c347f13b8c79ad4f6e6a89eb98fa6
--- /dev/null
+++ b/checkpoint-220/trainer_state.json
@@ -0,0 +1,175 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.015964008417386256,
+  "eval_steps": 500,
+  "global_step": 220,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-220/training_args.bin b/checkpoint-220/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-220/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-230/README.md b/checkpoint-230/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-230/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-230/adapter_config.json b/checkpoint-230/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-230/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-230/adapter_model.safetensors b/checkpoint-230/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d388d6c3932e2cf8ab69149140ee3c993456636f
--- /dev/null
+++ b/checkpoint-230/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:411f13cc54382a236e492814b22fda37c6a16ca0e27cdef0a4407f482cd752fc
+size 5919456
diff --git a/checkpoint-230/optimizer.pt b/checkpoint-230/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7f03acb0e95b8c02b97e4a08dde0325d8cc1a482
--- /dev/null
+++ b/checkpoint-230/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28d1a829d0aa8f8168e983ea90e125d24e10e6df2ebf1deec12ae6520d7fae3a
+size 11930938
diff --git a/checkpoint-230/rng_state_0.pth b/checkpoint-230/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c68750cce886cb775d35c5fccd5db3c3555363d5
--- /dev/null
+++ b/checkpoint-230/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f31b6861ef360b1f8f7ba30f91848c17eae922199d9a925854d74c60b6bcbe39
+size 15024
diff --git a/checkpoint-230/rng_state_1.pth b/checkpoint-230/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2ea9f284de25615ee3ead2b59b67ea7ad7fa4367
--- /dev/null
+++ b/checkpoint-230/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1f2affe943ec611905d18783b558f75ebb7f769d0bd8a15a6ac3fa317b59f80
+size 15024
diff --git a/checkpoint-230/rng_state_2.pth b/checkpoint-230/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..50afeddfc9d7a5cf759ddf0111437dcdf87fbc70
--- /dev/null
+++ b/checkpoint-230/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b9ad4f98473121dbffd26bf93eda8b37788dcf87d0c62ec4896117a4b45e596
+size 15024
diff --git a/checkpoint-230/rng_state_3.pth b/checkpoint-230/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cb088a7b95cc138773e6992c0e5150ff97b25138
--- /dev/null
+++ b/checkpoint-230/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d28066825e942cf92e047e62f797743944810e681a7cf5f6984ee017f90bb42
+size 15024
diff --git a/checkpoint-230/scheduler.pt b/checkpoint-230/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b72a92c7717b2cf510920d64ca580b0bec21b638
--- /dev/null
+++ b/checkpoint-230/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18491b2f8f88a264cf848ca4ccfb08b27fa9fb27ca0cf6cd8e63917394b19ade
+size 1064
diff --git a/checkpoint-230/trainer_state.json b/checkpoint-230/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..5c51db37745b324c1e7d94e6ff72a7e822988a78
--- /dev/null
+++ b/checkpoint-230/trainer_state.json
@@ -0,0 +1,182 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.016689645163631087,
+  "eval_steps": 500,
+  "global_step": 230,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-230/training_args.bin b/checkpoint-230/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-230/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-240/README.md b/checkpoint-240/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-240/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-240/adapter_config.json b/checkpoint-240/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-240/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-240/adapter_model.safetensors b/checkpoint-240/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..30bd83acb487943afc0dfd7802a132c382490b23
--- /dev/null
+++ b/checkpoint-240/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5038cfe700f7ac3f2e1505982bb097d3840529a09edfc1c616a6bf1e393a6df
+size 5919456
diff --git a/checkpoint-240/optimizer.pt b/checkpoint-240/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..331c6afa7533e2a2838faf289471fcb927a4e0be
--- /dev/null
+++ b/checkpoint-240/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80bbe1063be7539d53eeb10ac581308ac3000b9589a4d38bf2dbf9240f65ed35
+size 11930938
diff --git a/checkpoint-240/rng_state_0.pth b/checkpoint-240/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4d57f2e90384717bcce48b8519a5714de45f7b7b
--- /dev/null
+++ b/checkpoint-240/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58745d05e18e051e7d277a98eeb9a9590b39e8372075309b4931494def212d10
+size 15024
diff --git a/checkpoint-240/rng_state_1.pth b/checkpoint-240/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..576a70cfb7dbe639a00d7aa56a9e2934152d9cbf
--- /dev/null
+++ b/checkpoint-240/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c851cc404347ac489037c5246981664e3f033add977dafdd7d13ec0631bdd8a
+size 15024
diff --git a/checkpoint-240/rng_state_2.pth b/checkpoint-240/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9f2b9f2f88b4f81ef38beea9a7923a15c9673dbb
--- /dev/null
+++ b/checkpoint-240/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:acd66bcb4d4726b8ec9452a28d4fb5372003a38e8426423bf6bd28b672b0fdc1
+size 15024
diff --git a/checkpoint-240/rng_state_3.pth b/checkpoint-240/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..59179dc5ec4127589ae34dd3afb2222941d351f4
--- /dev/null
+++ b/checkpoint-240/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e91642e29f2f8b79a095419d1856de6be2673f1d24bbdd61708991800d148b6
+size 15024
diff --git a/checkpoint-240/scheduler.pt b/checkpoint-240/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..424d1fde9e6a7ba47dcb9ba6d98f0cb927c9da20
--- /dev/null
+++ b/checkpoint-240/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a103c50390a302379ad115ad3acb331799a0c70a4792c310d4893608eba142a
+size 1064
diff --git a/checkpoint-240/trainer_state.json b/checkpoint-240/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..3a8c3af3977b61e4d7e025fe5ea55c2994c843d8
--- /dev/null
+++ b/checkpoint-240/trainer_state.json
@@ -0,0 +1,189 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.017415281909875915,
+  "eval_steps": 500,
+  "global_step": 240,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-240/training_args.bin b/checkpoint-240/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-240/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-250/README.md b/checkpoint-250/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-250/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-250/adapter_config.json b/checkpoint-250/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-250/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-250/adapter_model.safetensors b/checkpoint-250/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2ccfa1f737e5975da877ef90fa09b092d1cca93f
--- /dev/null
+++ b/checkpoint-250/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:301450d1a00642ef4a347f7c10ff0dc4b2f90b4d4b65f464f82ab3b4a5ec35c6
+size 5919456
diff --git a/checkpoint-250/optimizer.pt b/checkpoint-250/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ec15a4651a45fdf7227d4da517e42193f80d16fd
--- /dev/null
+++ b/checkpoint-250/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4463e5af37ba64709eead0eadee759b28b0d941fe163d4f4d52df9cf9335e67
+size 11930938
diff --git a/checkpoint-250/rng_state_0.pth b/checkpoint-250/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4713b7423fa71ff9c6811be8f082773be8e24c4b
--- /dev/null
+++ b/checkpoint-250/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aee86a78259b6e02d1aff507611b7e0efb47c407b206da6ae187c3a6ef6ffb70
+size 15024
diff --git a/checkpoint-250/rng_state_1.pth b/checkpoint-250/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..988502460d1de09dce1295edb30d97fa555b1f39
--- /dev/null
+++ b/checkpoint-250/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f91fe3b0ad6571acecf97acf3b34552653da1cdb437cc92743635b1cb6378718
+size 15024
diff --git a/checkpoint-250/rng_state_2.pth b/checkpoint-250/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c5792f5fb28e71ad132ce8c5ffaf1303bfa7c21d
--- /dev/null
+++ b/checkpoint-250/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e72b70555625db2b4d0bb696afee6e3868cb327a5a00571b3b276989ebb8be1c
+size 15024
diff --git a/checkpoint-250/rng_state_3.pth b/checkpoint-250/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..927b7132d1f4bccee146819f32c23eb0588cd2ca
--- /dev/null
+++ b/checkpoint-250/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b05109309e52a227468d27e588e1b56c09dd9cd76c6417ad7e44f3e9f4ab243e
+size 15024
diff --git a/checkpoint-250/scheduler.pt b/checkpoint-250/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0dc6b7709c592cc957eb43630e3e70f87a838b97
--- /dev/null
+++ b/checkpoint-250/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35f3fca1ce040d844c4c770a92f77313b42e6e0809d91dfe39550e05db407921
+size 1064
diff --git a/checkpoint-250/trainer_state.json b/checkpoint-250/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..32cc41dd96b47a12bb922362e9954564b6b8dd59
--- /dev/null
+++ b/checkpoint-250/trainer_state.json
@@ -0,0 +1,196 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.018140918656120747,
+  "eval_steps": 500,
+  "global_step": 250,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-250/training_args.bin b/checkpoint-250/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-250/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-260/README.md b/checkpoint-260/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-260/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-260/adapter_config.json b/checkpoint-260/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-260/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-260/adapter_model.safetensors b/checkpoint-260/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3af8cda06aeacd1123fc604ed13b4274e33256fd
--- /dev/null
+++ b/checkpoint-260/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29000eca8b6f46976005aad34a041082cb5f61fa8a54b32a4c48b0094eacc909
+size 5919456
diff --git a/checkpoint-260/optimizer.pt b/checkpoint-260/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d872c4df3f1e4412628e6adb91c572dce1fd1853
--- /dev/null
+++ b/checkpoint-260/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18c5c9d5a0759506cbafc03ff94f4d6712a92377680291d0e8ccf701f825504f
+size 11930938
diff --git a/checkpoint-260/rng_state_0.pth b/checkpoint-260/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5986159d9814c60e6e2c7b9845b8a11b6c9eacc4
--- /dev/null
+++ b/checkpoint-260/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19ab1100726ef208c33b7e659f3340a533b61f069b5fa9eb6355bac9eaa6c876
+size 15024
diff --git a/checkpoint-260/rng_state_1.pth b/checkpoint-260/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..94024e13f19412b7f0f443b53bc1c14c189b7f6f
--- /dev/null
+++ b/checkpoint-260/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a660538aa0ca7e33f80069de79c46bca026a35c420ff5d83cfef38640eadb1c3
+size 15024
diff --git a/checkpoint-260/rng_state_2.pth b/checkpoint-260/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c2d39895499f444760f7eeb20e7cb96d49db4c69
--- /dev/null
+++ b/checkpoint-260/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed0d5232504ff0a49b85a6a76bab1542fdb729b1c77d3ec26fe42bde3d7656b4
+size 15024
diff --git a/checkpoint-260/rng_state_3.pth b/checkpoint-260/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ac757b8dda1aa5532598acd28192377f1f03d79f
--- /dev/null
+++ b/checkpoint-260/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae8f2759bd590d484c8441009c0eb397ac428cc143357d9d070f513c126b232b
+size 15024
diff --git a/checkpoint-260/scheduler.pt b/checkpoint-260/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5f9daab19bf9e7eb44f861f4f86ac489f23791f3
--- /dev/null
+++ b/checkpoint-260/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7cb63cbf8c0dda27fb3e5efea17d3726fb6b6ea77e5063673a9601a85b22d871
+size 1064
diff --git a/checkpoint-260/trainer_state.json b/checkpoint-260/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..02885cb1cfa94b8eeaf5b313d49461a14f5999fa
--- /dev/null
+++ b/checkpoint-260/trainer_state.json
@@ -0,0 +1,203 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.018866555402365575,
+  "eval_steps": 500,
+  "global_step": 260,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-260/training_args.bin b/checkpoint-260/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-260/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-270/README.md b/checkpoint-270/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-270/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-270/adapter_config.json b/checkpoint-270/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-270/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-270/adapter_model.safetensors b/checkpoint-270/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bd8caa162a3c7178265e8a595672e828046422c9
--- /dev/null
+++ b/checkpoint-270/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73a56fb0c71ed87e987a23e19915e3483aedd48876eb0378b00aefa55e712b57
+size 5919456
diff --git a/checkpoint-270/optimizer.pt b/checkpoint-270/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..073719a6fbaffee411cef91e80d629a9ab5745e4
--- /dev/null
+++ b/checkpoint-270/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7de37fc431c8cbb2cff13a6d10673ed85825f1dfb8451432a9423d1dc55df49
+size 11930938
diff --git a/checkpoint-270/rng_state_0.pth b/checkpoint-270/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f05233abd9598e87fb57c48529e24e3ea8b9517c
--- /dev/null
+++ b/checkpoint-270/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9030cf84602b044b4abc21ad0343a30164c20e803ab04dd65f33d7b34a84a29a
+size 15024
diff --git a/checkpoint-270/rng_state_1.pth b/checkpoint-270/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f19cc0e17ace2b704c08fe29a33c539f9465caa0
--- /dev/null
+++ b/checkpoint-270/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9565a4c8b0ca8999336de0db1c94d5bdfeb3570932d1a55ca515f5adc74c30a3
+size 15024
diff --git a/checkpoint-270/rng_state_2.pth b/checkpoint-270/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..705738e753a2818baa6f933831ca78f054c406f7
--- /dev/null
+++ b/checkpoint-270/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dfa790f05b0001741c55759d1b35bd8b4c645222299ae14beb864348afe0f49a
+size 15024
diff --git a/checkpoint-270/rng_state_3.pth b/checkpoint-270/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7b771036b0768a2aac8a9322e11698a375ac633c
--- /dev/null
+++ b/checkpoint-270/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5821590bbf0fe5f12ee494d5c97256f4c278acecd59663a42ff89e3cda932497
+size 15024
diff --git a/checkpoint-270/scheduler.pt b/checkpoint-270/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c964f3c6a6ea7475964aaa1f41c2ee6e74f5ee58
--- /dev/null
+++ b/checkpoint-270/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:081b1acbf1eef67d740cc6fd507aacb66286a94df8e553df24ee372e73c18da7
+size 1064
diff --git a/checkpoint-270/trainer_state.json b/checkpoint-270/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d40e1967e2b51c1017bf1b11ae6090dd774a8825
--- /dev/null
+++ b/checkpoint-270/trainer_state.json
@@ -0,0 +1,210 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.019592192148610407,
+  "eval_steps": 500,
+  "global_step": 270,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-270/training_args.bin b/checkpoint-270/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-270/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-280/README.md b/checkpoint-280/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-280/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-280/adapter_config.json b/checkpoint-280/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-280/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-280/adapter_model.safetensors b/checkpoint-280/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..df12fca26f19c3433ec1a29ce906a02ad3898f26
--- /dev/null
+++ b/checkpoint-280/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc8798995b04e4cab63504243e2c21f22309fc519bf62c9db56816d5c46d2620
+size 5919456
diff --git a/checkpoint-280/optimizer.pt b/checkpoint-280/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ef30c2fd4914094e2b10380c68dedcb42bf409b7
--- /dev/null
+++ b/checkpoint-280/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d99748a577db7511f849038f672cb342656238f95a3e1510e6f8e6126a5703f9
+size 11930938
diff --git a/checkpoint-280/rng_state_0.pth b/checkpoint-280/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cd1fce629acf11a75ada7411064808a959e6d14e
--- /dev/null
+++ b/checkpoint-280/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6446078e83bac279eab0ee7d6a22ca16b5aad48614d84b0ceabb5981efe81b1
+size 15024
diff --git a/checkpoint-280/rng_state_1.pth b/checkpoint-280/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1cf7cf5a283eef502d048c98433bcf2585d24816
--- /dev/null
+++ b/checkpoint-280/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9887930987b82bd1aba09b72af6b46e3fd0bd6b94b09e189c9a8b690282cb47
+size 15024
diff --git a/checkpoint-280/rng_state_2.pth b/checkpoint-280/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4f4bdd0be477a59a0b1d5aa3dc566258061c76b1
--- /dev/null
+++ b/checkpoint-280/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f32c141fb3bd2e9dd486ef3b92d1447f6e803722470c82bae776bfe7efaeaf2
+size 15024
diff --git a/checkpoint-280/rng_state_3.pth b/checkpoint-280/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d182f550c0ef271622c08a54484dacbc3d5aaef2
--- /dev/null
+++ b/checkpoint-280/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ce98d0a1ee786c8ef3bf8704ad72999394c76b8ab6b488bf171176bdcba6805
+size 15024
diff --git a/checkpoint-280/scheduler.pt b/checkpoint-280/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c84cb297d88955a387fbafe953ef6695a1a8998e
--- /dev/null
+++ b/checkpoint-280/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5aba200a61f61f1537e122c9145e58ffb301319be89cf2fba4c9c9eeed528a5c
+size 1064
diff --git a/checkpoint-280/trainer_state.json b/checkpoint-280/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..917369600e8ab539a4e9fa6b7e2f3e6547737082
--- /dev/null
+++ b/checkpoint-280/trainer_state.json
@@ -0,0 +1,217 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.020317828894855235,
+  "eval_steps": 500,
+  "global_step": 280,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-280/training_args.bin b/checkpoint-280/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-280/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-290/README.md b/checkpoint-290/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-290/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-290/adapter_config.json b/checkpoint-290/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-290/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-290/adapter_model.safetensors b/checkpoint-290/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b00aeb8c1cee5edc18c831dbf40e50c730096644
--- /dev/null
+++ b/checkpoint-290/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2453cb86e5465b2d9eef93022843f24de5a6bcebbdec95c2ec3e0f3f65e8b74
+size 5919456
diff --git a/checkpoint-290/optimizer.pt b/checkpoint-290/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..28deee2198c67886e7e4c11928147f9201979b06
--- /dev/null
+++ b/checkpoint-290/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d01b3c7576859df986d76acc259613b1a521f9db353a72f9efa331d6fc71b9a4
+size 11930938
diff --git a/checkpoint-290/rng_state_0.pth b/checkpoint-290/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6142001eee84d6c48f80ffd3eedba90655428239
--- /dev/null
+++ b/checkpoint-290/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10ecf202489cdffda7040c277e81be6d180addb568115e94b3291f496d26b1c2
+size 15024
diff --git a/checkpoint-290/rng_state_1.pth b/checkpoint-290/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..21df1ceab9a496591f3ec6c06290e5f1bcd0a695
--- /dev/null
+++ b/checkpoint-290/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ad7fd7bfb4cb1cf9da691ddec7b2f8dd580d0d8eba960dee03b17c3c70b5966
+size 15024
diff --git a/checkpoint-290/rng_state_2.pth b/checkpoint-290/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4fa86dc44761d2e9858557cf0ffea31e134836fc
--- /dev/null
+++ b/checkpoint-290/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea8c68c0f18cc1ad18cc3e64329385de34fffb30cc23949ad29050aa152ddd21
+size 15024
diff --git a/checkpoint-290/rng_state_3.pth b/checkpoint-290/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d4bb0dbace4cb08643e074b55baac30dbac47009
--- /dev/null
+++ b/checkpoint-290/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee60dc50d220aef980fc46b6da6b6d934b450850569607e58ca21bc4e13b47e5
+size 15024
diff --git a/checkpoint-290/scheduler.pt b/checkpoint-290/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f3327ff8bd63969250b5f34d7565fce30e38446a
--- /dev/null
+++ b/checkpoint-290/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3c5009df5d0887a314fcfb0d1632ebc9e225d0bd9ce03cdcd3860fc95be2718
+size 1064
diff --git a/checkpoint-290/trainer_state.json b/checkpoint-290/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..7e25e188dcace15096d180dccd4050c4414c6f12
--- /dev/null
+++ b/checkpoint-290/trainer_state.json
@@ -0,0 +1,224 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.021043465641100066,
+  "eval_steps": 500,
+  "global_step": 290,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-290/training_args.bin b/checkpoint-290/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-290/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-30/README.md b/checkpoint-30/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-30/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-30/adapter_config.json b/checkpoint-30/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-30/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-30/adapter_model.safetensors b/checkpoint-30/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7d1c2653826885fea31dcab7f1f232fbab352be0
--- /dev/null
+++ b/checkpoint-30/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:505af2f4402f468c656710b0f6589bca09f7beb3cef656989301d3b8847a5c7c
+size 5919456
diff --git a/checkpoint-30/optimizer.pt b/checkpoint-30/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1d766963dffc4a7d032842d6b278cc79db5d37b9
--- /dev/null
+++ b/checkpoint-30/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e82366c2b0150cbefe38a9726fccf78b09f7e8441c6d1c529a905627416b1a73
+size 11930938
diff --git a/checkpoint-30/rng_state_0.pth b/checkpoint-30/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cbb00ec3c77a5ef34c808105a14561e4b3fb4574
--- /dev/null
+++ b/checkpoint-30/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80c2f27d97a10c31a8638a58d5088a128efbcbf227dc925bea21ae2142096923
+size 15024
diff --git a/checkpoint-30/rng_state_1.pth b/checkpoint-30/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1a367dd971913b0c76a7b19d3948876472b05ca3
--- /dev/null
+++ b/checkpoint-30/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e608d3782d947f3f883a4b59c9c0fd4233eb58985d44365aa874198c44057567
+size 15024
diff --git a/checkpoint-30/rng_state_2.pth b/checkpoint-30/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4a6fa526b9a44edd2692eda125ed70f3eceb200a
--- /dev/null
+++ b/checkpoint-30/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18fcde1cc7e91d886643e2dc65d40208b9dd4186781b64e6fb5cd3cc4748be1c
+size 15024
diff --git a/checkpoint-30/rng_state_3.pth b/checkpoint-30/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e875f1ba6b679566fe9122b9653f396cf58dba01
--- /dev/null
+++ b/checkpoint-30/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a65a67b1e90f3a9e2bf7adf3069c8e8844c3681189f60cdf31ba20176ffe8ab
+size 15024
diff --git a/checkpoint-30/scheduler.pt b/checkpoint-30/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f00273a5062f9f5d429b156bf9ab8151e784f84e
--- /dev/null
+++ b/checkpoint-30/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6032f2dbcaea8c6cf17193dd7b6064bcbb5402d7520baefc91622f49ce3944f2
+size 1064
diff --git a/checkpoint-30/trainer_state.json b/checkpoint-30/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d2c8805507c60bd35753e601c416034c91c73b4f
--- /dev/null
+++ b/checkpoint-30/trainer_state.json
@@ -0,0 +1,42 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.0021769102387344894,
+  "eval_steps": 500,
+  "global_step": 30,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-30/training_args.bin b/checkpoint-30/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-30/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-300/README.md b/checkpoint-300/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-300/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-300/adapter_config.json b/checkpoint-300/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-300/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-300/adapter_model.safetensors b/checkpoint-300/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9d3bd9db6c7137e561b5d64f52cd6e46fdd7878a
--- /dev/null
+++ b/checkpoint-300/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8f69b0f11e4eb9a931387b81688e965d3023ac1386bea140c82c3d857bff0dd
+size 5919456
diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7a2f533a5b5b6760db802f03633b45773fc52912
--- /dev/null
+++ b/checkpoint-300/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad03f19a4002cf23e906c3aff2a16f7532d68290b7d24bbfb65a4acdb5d59c33
+size 11930938
diff --git a/checkpoint-300/rng_state_0.pth b/checkpoint-300/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..83dd428249d32d9afc5469ec82c2173ef48cdd36
--- /dev/null
+++ b/checkpoint-300/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ffddd2869d266e566aedf316e46ee62978bba1fb4be250207ab9de7330f5477
+size 15024
diff --git a/checkpoint-300/rng_state_1.pth b/checkpoint-300/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4001b60d8cac6c2628f9204935b6e232aef09474
--- /dev/null
+++ b/checkpoint-300/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9e1971b3b34ac302cb20e4a3bde6f2cef6b53c750487560ab2e417d30d34742
+size 15024
diff --git a/checkpoint-300/rng_state_2.pth b/checkpoint-300/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..59b9c92f8e994188209d7409c22bfa335e3d1655
--- /dev/null
+++ b/checkpoint-300/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e886fc43e3ae110a3b6320792c406afdc247f52e086c52eb1a507bcce48c90e1
+size 15024
diff --git a/checkpoint-300/rng_state_3.pth b/checkpoint-300/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f45acd502d6345e61bfae08006c2812d8c79c716
--- /dev/null
+++ b/checkpoint-300/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d01f302fec204113c70aea51b32f139f52071a27440a434f15b42069147de2f
+size 15024
diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b911dca6a574cabe53321582b7f4d789ba852912
--- /dev/null
+++ b/checkpoint-300/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:370323249b944cd4671b72e1979c5e1141c89d64d32181f0d61e2ef4dea02e7d
+size 1064
diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c6ff9b7d3aaccf49672c626cfbdd28d8136153d
--- /dev/null
+++ b/checkpoint-300/trainer_state.json
@@ -0,0 +1,231 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.021769102387344894,
+  "eval_steps": 500,
+  "global_step": 300,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-300/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-310/README.md b/checkpoint-310/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-310/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-310/adapter_config.json b/checkpoint-310/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-310/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-310/adapter_model.safetensors b/checkpoint-310/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..71bd0d383823a8fd657dde6244238cc1f2e02cce
--- /dev/null
+++ b/checkpoint-310/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8498a3b72fa6b73e531a18e19bd219e6f8590f5be238a061a4c6488cb2d2ce41
+size 5919456
diff --git a/checkpoint-310/optimizer.pt b/checkpoint-310/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bd7ae0e38f1b009fcdf4ff09471614c74612a620
--- /dev/null
+++ b/checkpoint-310/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:436d40866ba7852060f73946dc8c0a7ae73452de51d8bed881d54b9ca8901e63
+size 11930938
diff --git a/checkpoint-310/rng_state_0.pth b/checkpoint-310/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..88a1a410a9839c59a81b645d8d0e0acd1e61b640
--- /dev/null
+++ b/checkpoint-310/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0f8d5aaec2381dce03acaa023b7714b9af8d74990fdeb63144149174f500347
+size 15024
diff --git a/checkpoint-310/rng_state_1.pth b/checkpoint-310/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..07c9736cb7d1d7e98a583df40c495d90fe7b986c
--- /dev/null
+++ b/checkpoint-310/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1cf59769c800800834cadfdea32ecd38fd26157f2dc68fd12df2d99fab744ad3
+size 15024
diff --git a/checkpoint-310/rng_state_2.pth b/checkpoint-310/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4553fe9252e0e1d8c88256e83f57ac07b232b6e4
--- /dev/null
+++ b/checkpoint-310/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60baa7f85097b313ee9a276c0945ceffced6909abc5d6dd56dcfd8155c025f33
+size 15024
diff --git a/checkpoint-310/rng_state_3.pth b/checkpoint-310/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9f2e95abd7c126fc81cd55c6bbc98b7e11ac7c6f
--- /dev/null
+++ b/checkpoint-310/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bce7a9dfe18b09bd1249dd5b9d6baea618d6f6ca19b767ce0763cdd363d19121
+size 15024
diff --git a/checkpoint-310/scheduler.pt b/checkpoint-310/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..eca4b24e7b333a779a6a96b8e6752ac038ede3b9
--- /dev/null
+++ b/checkpoint-310/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3874aad844c70b83056f1c7b087723fae5383168ae7a31b13685259248916f9f
+size 1064
diff --git a/checkpoint-310/trainer_state.json b/checkpoint-310/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..3ca710ad065cb1e49129c4978bd1b182331bcda4
--- /dev/null
+++ b/checkpoint-310/trainer_state.json
@@ -0,0 +1,238 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.022494739133589726,
+  "eval_steps": 500,
+  "global_step": 310,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-310/training_args.bin b/checkpoint-310/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-310/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-320/README.md b/checkpoint-320/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-320/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-320/adapter_config.json b/checkpoint-320/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-320/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-320/adapter_model.safetensors b/checkpoint-320/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..237ec38bd3ab2314b993ddfe08da3b05da063fc2
--- /dev/null
+++ b/checkpoint-320/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c57ad0c44f848f6e5e8b7a1a1aad0bcc162948f28b90732bb8a06493db071af
+size 5919456
diff --git a/checkpoint-320/optimizer.pt b/checkpoint-320/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..af6f3177e5cf829a0154559e70e9e665139e7d33
--- /dev/null
+++ b/checkpoint-320/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6f7873d731d0b621761b932b3ead09e436e55b2ee9f78910ca6bad4dd1b58b7
+size 11930938
diff --git a/checkpoint-320/rng_state_0.pth b/checkpoint-320/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2ffa711cdbf46a981ef6ea27285388e2fe15db49
--- /dev/null
+++ b/checkpoint-320/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6b3a9c1f3554ec29be8c263a22a1d44c245d9b40f20469de6dc0c695c6649a2
+size 15024
diff --git a/checkpoint-320/rng_state_1.pth b/checkpoint-320/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ecaf70827b33cabcc9c59f1fb242c81694ebb5a5
--- /dev/null
+++ b/checkpoint-320/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61e4a523b11efe5f35e45cde3d5d8dcb6dfbfaaf813cd1384fb442a47c85a381
+size 15024
diff --git a/checkpoint-320/rng_state_2.pth b/checkpoint-320/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d5ffb3346d9302117c04f247348b5f9c6a7e19a9
--- /dev/null
+++ b/checkpoint-320/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:614ebb7f79d70899798c0b6a06deadbcdd2910c4ee4fc68b8389130ffac98ae6
+size 15024
diff --git a/checkpoint-320/rng_state_3.pth b/checkpoint-320/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9e22ed527fccaef238bb929b2165d11181bb5f6f
--- /dev/null
+++ b/checkpoint-320/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b5f52caa8ac046bae234daa865a00e10f03e7d7beda6278af9db5a92b47db8d
+size 15024
diff --git a/checkpoint-320/scheduler.pt b/checkpoint-320/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d9598e4cadde8a10cf9806943a15789c933a9fa4
--- /dev/null
+++ b/checkpoint-320/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f75cd94dee5033e9a13557b30ff14de8d06db186d29178863ddc026d2586e487
+size 1064
diff --git a/checkpoint-320/trainer_state.json b/checkpoint-320/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..a28747814580610e37a81ac15dccd14ed35df6e8
--- /dev/null
+++ b/checkpoint-320/trainer_state.json
@@ -0,0 +1,245 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.023220375879834554,
+  "eval_steps": 500,
+  "global_step": 320,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 1.7890625,
+      "learning_rate": 0.0004996810467247207,
+      "loss": 0.2899,
+      "step": 320
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-320/training_args.bin b/checkpoint-320/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-320/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-330/README.md b/checkpoint-330/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-330/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-330/adapter_config.json b/checkpoint-330/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-330/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-330/adapter_model.safetensors b/checkpoint-330/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cd09df4e305491646ac5dcdee7e4be96f8d90984
--- /dev/null
+++ b/checkpoint-330/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88759d9e60aba17cd028a6a119793319aed0cfaac44fccf144fa706948aabf8b
+size 5919456
diff --git a/checkpoint-330/optimizer.pt b/checkpoint-330/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..de5808120b148d0a82bbe3ae88eee55fdfc48da2
--- /dev/null
+++ b/checkpoint-330/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f365f9b71ef157714061de8d98e78663b0f9bc40bd1082db0190c08fcab7cd6
+size 11930938
diff --git a/checkpoint-330/rng_state_0.pth b/checkpoint-330/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1a4494144df2c2b033835bfdae880b9bd5fbe543
--- /dev/null
+++ b/checkpoint-330/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1fe1229961077eeb711058a791ec8b58ee9fec26f7d7c11fdbbcc280ec7c903e
+size 15024
diff --git a/checkpoint-330/rng_state_1.pth b/checkpoint-330/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..df60d6b20d25ac640d38b4eb061e24f626baf578
--- /dev/null
+++ b/checkpoint-330/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4667fbba6fad5df1de226ac6cf5d3cfc1816528968c1ff259ddaa5db6ab723d
+size 15024
diff --git a/checkpoint-330/rng_state_2.pth b/checkpoint-330/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..422da36741950ba5633688a85dbba07997ffa96d
--- /dev/null
+++ b/checkpoint-330/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:645b737c6ccdce91522bc55c30a2afbad0340faa70101f8ff8b95ffac340f2d3
+size 15024
diff --git a/checkpoint-330/rng_state_3.pth b/checkpoint-330/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..33bee041b83116b398d03986f8068b1f5aca145e
--- /dev/null
+++ b/checkpoint-330/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c835b6854556a7935eaeab73c709db1e4176fe90c9cfd79a6e187768b1973f7e
+size 15024
diff --git a/checkpoint-330/scheduler.pt b/checkpoint-330/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..405257a87749aa42dceaa44aa2faf9d5513451ff
--- /dev/null
+++ b/checkpoint-330/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eed4882107b0a54152a7fae0d940d4892ac4f7f1d37e4307835968f4f856f156
+size 1064
diff --git a/checkpoint-330/trainer_state.json b/checkpoint-330/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..7045f4f021646c80befeb20b3d2e4465d2545f50
--- /dev/null
+++ b/checkpoint-330/trainer_state.json
@@ -0,0 +1,252 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.023946012626079385,
+  "eval_steps": 500,
+  "global_step": 330,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 1.7890625,
+      "learning_rate": 0.0004996810467247207,
+      "loss": 0.2899,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 1.9296875,
+      "learning_rate": 0.0004996513988716786,
+      "loss": 0.2324,
+      "step": 330
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-330/training_args.bin b/checkpoint-330/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-330/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-340/README.md b/checkpoint-340/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-340/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-340/adapter_config.json b/checkpoint-340/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-340/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-340/adapter_model.safetensors b/checkpoint-340/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..259fe36ac8d1a1f3a05374295ad719f87f4a4cca
--- /dev/null
+++ b/checkpoint-340/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:359fdf1a2c2eb259cec59b720b497efd56d8afa9b33cb1e1129cd96b0e5705a9
+size 5919456
diff --git a/checkpoint-340/optimizer.pt b/checkpoint-340/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a96bbb775ed881ea2f4d62efafb333af0430b8f9
--- /dev/null
+++ b/checkpoint-340/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c80c359241a7449ed74c39e00e69931f8bb170b0ec0a849dbc8612dfcd89cb5
+size 11930938
diff --git a/checkpoint-340/rng_state_0.pth b/checkpoint-340/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..852562f5d839efc973e59f76ff158bef4f8e4cd0
--- /dev/null
+++ b/checkpoint-340/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d1b37c0c39f42b0e731b0ce8381fd1d6e61522adb9c7a52085f3bbaaa137c81
+size 15024
diff --git a/checkpoint-340/rng_state_1.pth b/checkpoint-340/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e1a1c465a89dc7dd1707da72a449dfca08e4a622
--- /dev/null
+++ b/checkpoint-340/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d442c8c51b6e14b41f3c95d25b451a65ffae4f5ce30d1f84755ff3615eff9d13
+size 15024
diff --git a/checkpoint-340/rng_state_2.pth b/checkpoint-340/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5336fc7d06fe88de7eb2ea2e60267f838924b3ff
--- /dev/null
+++ b/checkpoint-340/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c48003a40f042cbb527f53d852225776c8a6177157abca81cdaae7b56d7ad370
+size 15024
diff --git a/checkpoint-340/rng_state_3.pth b/checkpoint-340/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1b7fb2554acc0513bd93021720919a5820929522
--- /dev/null
+++ b/checkpoint-340/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7c6384de2f8695f35e1d9074eabb09aa3122a56501ee571f67417e923b30de9
+size 15024
diff --git a/checkpoint-340/scheduler.pt b/checkpoint-340/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3fc01f2c67b70cc072881af70934ecec8a8c472c
--- /dev/null
+++ b/checkpoint-340/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e12a7a9caf47ef832dc2224c4a43417b34dbcb83bc8e400cb60f17808dbac55f
+size 1064
diff --git a/checkpoint-340/trainer_state.json b/checkpoint-340/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..fc09534dd08da8f02fa8a8902448e489f537fcb5
--- /dev/null
+++ b/checkpoint-340/trainer_state.json
@@ -0,0 +1,259 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.024671649372324213,
+  "eval_steps": 500,
+  "global_step": 340,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 1.7890625,
+      "learning_rate": 0.0004996810467247207,
+      "loss": 0.2899,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 1.9296875,
+      "learning_rate": 0.0004996513988716786,
+      "loss": 0.2324,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 1.59375,
+      "learning_rate": 0.0004996204345883278,
+      "loss": 0.2376,
+      "step": 340
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-340/training_args.bin b/checkpoint-340/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-340/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-350/README.md b/checkpoint-350/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-350/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-350/adapter_config.json b/checkpoint-350/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-350/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-350/adapter_model.safetensors b/checkpoint-350/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..82bbaaa37b15bc8e1ab4dc9e4d536991d536ec55
--- /dev/null
+++ b/checkpoint-350/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:107a3516989b39e642aa9abc0e4be97865674e72ca371187ce9c4bd265d848a9
+size 5919456
diff --git a/checkpoint-350/optimizer.pt b/checkpoint-350/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c32bbe4fd9c6fc502341f7a249e8c45874ee9275
--- /dev/null
+++ b/checkpoint-350/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:466b7dff1ef42262ee62d5b224b1a3312be3b0e3477d3ede8c731ba2035af938
+size 11930938
diff --git a/checkpoint-350/rng_state_0.pth b/checkpoint-350/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..72b6b5ab294d04c9a2a8f70ff379be5b259f3f03
--- /dev/null
+++ b/checkpoint-350/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b1e18052127e2de91acf10f8b47f9cff3595a79025227c9c631027861fcbbb99
+size 15024
diff --git a/checkpoint-350/rng_state_1.pth b/checkpoint-350/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..dee41cae1bc010c0ec23dbd4302402489b178fe5
--- /dev/null
+++ b/checkpoint-350/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e3614fc6f0007750b432ee70bf5896a5fddfee40f3a86ad592a98f8a3a81125
+size 15024
diff --git a/checkpoint-350/rng_state_2.pth b/checkpoint-350/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9423e42ffc31c78a8ba0fa93c3d2e1a421197c6b
--- /dev/null
+++ b/checkpoint-350/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a65dc95baa1e904fdac16e865cb160a5aa7af67a382c615639a573fa8689b78
+size 15024
diff --git a/checkpoint-350/rng_state_3.pth b/checkpoint-350/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e90ce1d10c4f3201e342fb8d18fbfd37475d6975
--- /dev/null
+++ b/checkpoint-350/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2aa8ee9a4a09f7b99ca2be28334313f1a38cf621e55ec5862d12e8ea4b952fa7
+size 15024
diff --git a/checkpoint-350/scheduler.pt b/checkpoint-350/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8edb63d6fc01b065b6c9c528954d6dbdb371e925
--- /dev/null
+++ b/checkpoint-350/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a935b12ff254b5dc38f7c86cc2d4aa4a25509ddd267fcc8b5f885b99c410696
+size 1064
diff --git a/checkpoint-350/trainer_state.json b/checkpoint-350/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..5bcf0f77c6147c470137512d56174f803d18a4a8
--- /dev/null
+++ b/checkpoint-350/trainer_state.json
@@ -0,0 +1,266 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.025397286118569045,
+  "eval_steps": 500,
+  "global_step": 350,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 1.7890625,
+      "learning_rate": 0.0004996810467247207,
+      "loss": 0.2899,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 1.9296875,
+      "learning_rate": 0.0004996513988716786,
+      "loss": 0.2324,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 1.59375,
+      "learning_rate": 0.0004996204345883278,
+      "loss": 0.2376,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 1.734375,
+      "learning_rate": 0.0004995881540379454,
+      "loss": 0.2927,
+      "step": 350
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-350/training_args.bin b/checkpoint-350/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-350/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-360/README.md b/checkpoint-360/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-360/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-360/adapter_config.json b/checkpoint-360/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-360/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-360/adapter_model.safetensors b/checkpoint-360/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..598d999d23b1c334a85982ade69e4357bd1590fc
--- /dev/null
+++ b/checkpoint-360/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1be4820bcbf871dad6bd046ba3558daa31bd454b23ebdf6cc4e0b1970cda6a67
+size 5919456
diff --git a/checkpoint-360/optimizer.pt b/checkpoint-360/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..37a4946100f2dfcb93ece4ddb74deccac3cdb88d
--- /dev/null
+++ b/checkpoint-360/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1df7c71b7607d3928456aec0dd4d7f3811703ee6fd10f915442a81fd1487121
+size 11930938
diff --git a/checkpoint-360/rng_state_0.pth b/checkpoint-360/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..22ef40ec54530f943fe84df8a4eda896e507416b
--- /dev/null
+++ b/checkpoint-360/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6055b6a6600edf972449926a6d9e1f1a247175c461f5f21321d26e27fea2be59
+size 15024
diff --git a/checkpoint-360/rng_state_1.pth b/checkpoint-360/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3e7fec541c040cfdc3f98022f97c2a818688f6b1
--- /dev/null
+++ b/checkpoint-360/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1692dc7dd5df436d5ce9ab34d0ffb67ccf1ac9f6ad34e4c78261a9f441a6db1b
+size 15024
diff --git a/checkpoint-360/rng_state_2.pth b/checkpoint-360/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..22323d232189aca1f95a821b9cc3d44ed48f473b
--- /dev/null
+++ b/checkpoint-360/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f2a7b39392c289fa6613bad5680bc4f74a71bbd981f0a7e60593044eecf174e
+size 15024
diff --git a/checkpoint-360/rng_state_3.pth b/checkpoint-360/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a41b44d06701de70547b4cce762c5049bf8d84c3
--- /dev/null
+++ b/checkpoint-360/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4fcf7ef19ff07bd8503e736640eae507d4f24dbe6df23e2390549ba8d610ef4
+size 15024
diff --git a/checkpoint-360/scheduler.pt b/checkpoint-360/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ff2f7c17d593296aa78d47284744696583cb6872
--- /dev/null
+++ b/checkpoint-360/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b860c70deb1c46c42a28eda405621ad35cd321d87a9a89ec140e1c035e1419d2
+size 1064
diff --git a/checkpoint-360/trainer_state.json b/checkpoint-360/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..be8cac1510c0a095d2679f2caf52c04b6d859681
--- /dev/null
+++ b/checkpoint-360/trainer_state.json
@@ -0,0 +1,273 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.026122922864813873,
+  "eval_steps": 500,
+  "global_step": 360,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 1.7890625,
+      "learning_rate": 0.0004996810467247207,
+      "loss": 0.2899,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 1.9296875,
+      "learning_rate": 0.0004996513988716786,
+      "loss": 0.2324,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 1.59375,
+      "learning_rate": 0.0004996204345883278,
+      "loss": 0.2376,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 1.734375,
+      "learning_rate": 0.0004995881540379454,
+      "loss": 0.2927,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 1.9375,
+      "learning_rate": 0.0004995545573907492,
+      "loss": 0.2738,
+      "step": 360
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-360/training_args.bin b/checkpoint-360/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-360/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-370/README.md b/checkpoint-370/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-370/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-370/adapter_config.json b/checkpoint-370/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-370/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-370/adapter_model.safetensors b/checkpoint-370/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a7aaa2a488a93f581fd7c14b03c67b13f0f568af
--- /dev/null
+++ b/checkpoint-370/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5f737c266c13e4e8e94b8656ded1b4b5d210200d6c4d1e28e720c638d78fedc
+size 5919456
diff --git a/checkpoint-370/optimizer.pt b/checkpoint-370/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c37468f016a2afa93653c99ace8cbdf7efb941c4
--- /dev/null
+++ b/checkpoint-370/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb87799157ea84b1824d4701042c4bb3a49f44a273630d7cccbdf39219742a42
+size 11930938
diff --git a/checkpoint-370/rng_state_0.pth b/checkpoint-370/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..abef8aa2e3e993088f6540b7adf4b005c8b3d4b8
--- /dev/null
+++ b/checkpoint-370/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3146f95f0c1744da54cfef23baa84eb478da1ede72afc7bb1a4d9c758b864513
+size 15024
diff --git a/checkpoint-370/rng_state_1.pth b/checkpoint-370/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a7bda2a97f439dc50d8c4c386b75aa8f0dbfb683
--- /dev/null
+++ b/checkpoint-370/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29f638987e1dd0d295ccf6faca3d895a81bf9d37a611d8c054768f38f31ef086
+size 15024
diff --git a/checkpoint-370/rng_state_2.pth b/checkpoint-370/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..199d723b6bbc73b8b9999b13830085f0f41a264e
--- /dev/null
+++ b/checkpoint-370/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11173c1a154308e33fbdcfdddfce8e815d6f8ca6d15a19ed8eb70b9ebcfb6462
+size 15024
diff --git a/checkpoint-370/rng_state_3.pth b/checkpoint-370/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bc87349bd8fad2e53569b988f385cc2293209aae
--- /dev/null
+++ b/checkpoint-370/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6ecb8620405fb1a0e7b3f96c27d98a80ac6913d9646fe27c04de0af84c8d828
+size 15024
diff --git a/checkpoint-370/scheduler.pt b/checkpoint-370/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0a54f910b2bb1d7e88b874877dec4c2dc0e7ff68
--- /dev/null
+++ b/checkpoint-370/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef19c461e86a9f40ea42137736fecca23a99737dcd862105a3c04fe4c15cbf8a
+size 1064
diff --git a/checkpoint-370/trainer_state.json b/checkpoint-370/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..549d3d111b0f04554ffe5ed2d06977baf7e8477d
--- /dev/null
+++ b/checkpoint-370/trainer_state.json
@@ -0,0 +1,280 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.026848559611058705,
+  "eval_steps": 500,
+  "global_step": 370,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 1.7890625,
+      "learning_rate": 0.0004996810467247207,
+      "loss": 0.2899,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 1.9296875,
+      "learning_rate": 0.0004996513988716786,
+      "loss": 0.2324,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 1.59375,
+      "learning_rate": 0.0004996204345883278,
+      "loss": 0.2376,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 1.734375,
+      "learning_rate": 0.0004995881540379454,
+      "loss": 0.2927,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 1.9375,
+      "learning_rate": 0.0004995545573907492,
+      "loss": 0.2738,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 2.421875,
+      "learning_rate": 0.0004995196448238966,
+      "loss": 0.2427,
+      "step": 370
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-370/training_args.bin b/checkpoint-370/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-370/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-380/README.md b/checkpoint-380/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-380/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-380/adapter_config.json b/checkpoint-380/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-380/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-380/adapter_model.safetensors b/checkpoint-380/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5a13c591113b8a816e50d25a3d388ca581a96ddb
--- /dev/null
+++ b/checkpoint-380/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1056ac9c00986f8ce1833e91687d0d5fe1678a6d91e432187394f9e7cba1454
+size 5919456
diff --git a/checkpoint-380/optimizer.pt b/checkpoint-380/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a9db1e624e6c7d50de2a4ed33a3b5bdfe08cd8bf
--- /dev/null
+++ b/checkpoint-380/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a0af51b3a0a4e516e8d60d990e8269e2ae7434db36966bad43eae61e07be050
+size 11930938
diff --git a/checkpoint-380/rng_state_0.pth b/checkpoint-380/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..56391f4ae7e42ee41ef43a331c68c6d12f723b79
--- /dev/null
+++ b/checkpoint-380/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:869f7f45cbd2839fb784ff244fdb5e948a544e107fdee68028fa6014c4a3e871
+size 15024
diff --git a/checkpoint-380/rng_state_1.pth b/checkpoint-380/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..774a7a29447decc4934acc03c3a446a84bd7fe21
--- /dev/null
+++ b/checkpoint-380/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:045f127d34df78c369ad340be7cfade66e2235f599400ab8377e1e0292680dcc
+size 15024
diff --git a/checkpoint-380/rng_state_2.pth b/checkpoint-380/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f18432f91b1aa118f1cad725f96f947db03fffa7
--- /dev/null
+++ b/checkpoint-380/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0bc04bd5a571fccfb0d78451e477662a81282126fc11f1140f74fe348b5e75e
+size 15024
diff --git a/checkpoint-380/rng_state_3.pth b/checkpoint-380/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..97ec1ba887c9aa5c1e67121da363327277afc228
--- /dev/null
+++ b/checkpoint-380/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e877c1e028fb86d09f7a062dad4347790ec12c4e0fccb67351c942ed83d06460
+size 15024
diff --git a/checkpoint-380/scheduler.pt b/checkpoint-380/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..38423c68aecba6014a9c30c5c4ba4b5e2c7127ae
--- /dev/null
+++ b/checkpoint-380/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d48d816b261941cc22a9d458c0af5d41192dc236a409dd4ccf4618d43e27b50b
+size 1064
diff --git a/checkpoint-380/trainer_state.json b/checkpoint-380/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..a876658d1268196c1765ee075b98025864067513
--- /dev/null
+++ b/checkpoint-380/trainer_state.json
@@ -0,0 +1,287 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.027574196357303533,
+  "eval_steps": 500,
+  "global_step": 380,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 1.7890625,
+      "learning_rate": 0.0004996810467247207,
+      "loss": 0.2899,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 1.9296875,
+      "learning_rate": 0.0004996513988716786,
+      "loss": 0.2324,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 1.59375,
+      "learning_rate": 0.0004996204345883278,
+      "loss": 0.2376,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 1.734375,
+      "learning_rate": 0.0004995881540379454,
+      "loss": 0.2927,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 1.9375,
+      "learning_rate": 0.0004995545573907492,
+      "loss": 0.2738,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 2.421875,
+      "learning_rate": 0.0004995196448238966,
+      "loss": 0.2427,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 2.671875,
+      "learning_rate": 0.0004994834165214843,
+      "loss": 0.2032,
+      "step": 380
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-380/training_args.bin b/checkpoint-380/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-380/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-390/README.md b/checkpoint-390/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-390/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-390/adapter_config.json b/checkpoint-390/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-390/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-390/adapter_model.safetensors b/checkpoint-390/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8dc8268483d6fc7c91d903af866267289bf4649f
--- /dev/null
+++ b/checkpoint-390/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a68d0ae3345080ef8e12af09f4503bc878e5195f969803bd1c4378b7cf34c04
+size 5919456
diff --git a/checkpoint-390/optimizer.pt b/checkpoint-390/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..eefdabe3d654f38743ecd10f5809a92fb1618aab
--- /dev/null
+++ b/checkpoint-390/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b7e5b152d1957ac3238ed651a58fbbcbf23198d5ac6ced81565ad77801775f4
+size 11930938
diff --git a/checkpoint-390/rng_state_0.pth b/checkpoint-390/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bcb1b3ee5fbbbad35c41795578a7881127a9c5f8
--- /dev/null
+++ b/checkpoint-390/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e746fd42b939bd101d682a4bccf8e64a77e73b3c9f7ab0111d0c59a298820909
+size 15024
diff --git a/checkpoint-390/rng_state_1.pth b/checkpoint-390/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e9121b38dc03afffd7733531c9f8f90f1070129d
--- /dev/null
+++ b/checkpoint-390/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ed64606035f6a7ff110850dddfdcde7e8fa6439784c71880243d45b5713d79a
+size 15024
diff --git a/checkpoint-390/rng_state_2.pth b/checkpoint-390/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1f04bd2afd9a3bb13834aed056e2206c9e53b624
--- /dev/null
+++ b/checkpoint-390/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70878cf40acd532d3d9ddedbbf5dd8d639f13977f1257f64c5ea95dd4d94d264
+size 15024
diff --git a/checkpoint-390/rng_state_3.pth b/checkpoint-390/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6a0b6e17f7b5baa4fa57f93b3c1cdeb7ea601988
--- /dev/null
+++ b/checkpoint-390/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4061a0958b75c03e67bef7eecf7933b3e14b5bb1a44480db37b14b3a00a29f0d
+size 15024
diff --git a/checkpoint-390/scheduler.pt b/checkpoint-390/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6c385c2463a29ebe1adcf63c3122b259dd868940
--- /dev/null
+++ b/checkpoint-390/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5602bc3f16efd69fd57e14b0c626efc9fc1faa0a73eb7da7d1c9e6f17e7a8bf5
+size 1064
diff --git a/checkpoint-390/trainer_state.json b/checkpoint-390/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..af115e63f12a598e7a39c23416d4d2c90fec148f
--- /dev/null
+++ b/checkpoint-390/trainer_state.json
@@ -0,0 +1,294 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.028299833103548364,
+  "eval_steps": 500,
+  "global_step": 390,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 1.7890625,
+      "learning_rate": 0.0004996810467247207,
+      "loss": 0.2899,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 1.9296875,
+      "learning_rate": 0.0004996513988716786,
+      "loss": 0.2324,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 1.59375,
+      "learning_rate": 0.0004996204345883278,
+      "loss": 0.2376,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 1.734375,
+      "learning_rate": 0.0004995881540379454,
+      "loss": 0.2927,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 1.9375,
+      "learning_rate": 0.0004995545573907492,
+      "loss": 0.2738,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 2.421875,
+      "learning_rate": 0.0004995196448238966,
+      "loss": 0.2427,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 2.671875,
+      "learning_rate": 0.0004994834165214843,
+      "loss": 0.2032,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 3.328125,
+      "learning_rate": 0.0004994458726745468,
+      "loss": 0.2748,
+      "step": 390
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-390/training_args.bin b/checkpoint-390/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-390/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-40/README.md b/checkpoint-40/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-40/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-40/adapter_config.json b/checkpoint-40/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-40/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-40/adapter_model.safetensors b/checkpoint-40/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..71e2c9ae80c5913ae379efa9e5e03b0363d804b6
--- /dev/null
+++ b/checkpoint-40/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7aadec3783c3e8b283fbeb1f67c8d3ff8c34daddda552f78efbac474dc2273d4
+size 5919456
diff --git a/checkpoint-40/optimizer.pt b/checkpoint-40/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d2179170b2f6d37e89c7788ead7f5385aeae5a3e
--- /dev/null
+++ b/checkpoint-40/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5aede053fb6325f560e0effa4d3bd47ee05769c5945227c43a6fc08fdd824ef
+size 11930938
diff --git a/checkpoint-40/rng_state_0.pth b/checkpoint-40/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9e8f16ac08aebd809ed776ef1ac75d3ee0e96ff9
--- /dev/null
+++ b/checkpoint-40/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1f8643655e1b0b84191728db04abf511651177d3f1347864bfc77509451a6e8
+size 15024
diff --git a/checkpoint-40/rng_state_1.pth b/checkpoint-40/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..782296e4dbb91ea8def64d3b86fb5eaf05b4ef66
--- /dev/null
+++ b/checkpoint-40/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65e85d9abc56f5dfd5d665c16de6a0a6bf782dca748ce6a340b75e82d74aeb39
+size 15024
diff --git a/checkpoint-40/rng_state_2.pth b/checkpoint-40/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..627ef6562eeb84b6bf2e753a35f57293e11f918c
--- /dev/null
+++ b/checkpoint-40/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f2d9d0c83de2024e3aa7cb67c9ccf00f23a1d9dcc3af75beabee1dddc69d0a9
+size 15024
diff --git a/checkpoint-40/rng_state_3.pth b/checkpoint-40/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0a349098c52cc9cdb6fff24f55ebc5df9954ea42
--- /dev/null
+++ b/checkpoint-40/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41ffc7ad388f1b28562b762777d46e208986f47ae3693de6880ff38dfa102632
+size 15024
diff --git a/checkpoint-40/scheduler.pt b/checkpoint-40/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..54d9b6ddbb9515a53847e31f110f5383ea98e7b7
--- /dev/null
+++ b/checkpoint-40/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1eee9125a37b3376a8080678cbfcc42684092a2d25b3c7f8f18c06521cf4b957
+size 1064
diff --git a/checkpoint-40/trainer_state.json b/checkpoint-40/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..c5b2dcf182ed389a0ff60819c8b1382d07a471ab
--- /dev/null
+++ b/checkpoint-40/trainer_state.json
@@ -0,0 +1,49 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.0029025469849793192,
+  "eval_steps": 500,
+  "global_step": 40,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-40/training_args.bin b/checkpoint-40/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-40/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-400/README.md b/checkpoint-400/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-400/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-400/adapter_config.json b/checkpoint-400/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-400/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-400/adapter_model.safetensors b/checkpoint-400/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7611e3acf2d10e5e9ec975ec9deec87b08b6bdff
--- /dev/null
+++ b/checkpoint-400/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d57d73741974492a8403ab8f0052c42a46f6345e5da95b0efc804f7b1da5319
+size 5919456
diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..60a2811161727bc157d6b8fd2a34cb1936f91d51
--- /dev/null
+++ b/checkpoint-400/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f36c3bac062e1f80aca614615360aed597dd598eebc45a343100ce4c50e91481
+size 11930938
diff --git a/checkpoint-400/rng_state_0.pth b/checkpoint-400/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c2ea04feadfa53923087ac350490049ecc04ad0e
--- /dev/null
+++ b/checkpoint-400/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54dc0e933d802a54778f931ba67b11a16b46e9d6a694245077dc890d7f369d06
+size 15024
diff --git a/checkpoint-400/rng_state_1.pth b/checkpoint-400/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7ee05facc9af30e8a6fc98198d9fc6392d85da19
--- /dev/null
+++ b/checkpoint-400/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07686b712dd1aa2a2280fa61110438ceda047eb1071bdd181487081671ab0a93
+size 15024
diff --git a/checkpoint-400/rng_state_2.pth b/checkpoint-400/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b33dd65ae6427ff942048d715ebbb5ab3a2300f6
--- /dev/null
+++ b/checkpoint-400/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f33ba30afda151e1a06581ceff90e24fa69a2593e15f352510b6cdbbeebe383c
+size 15024
diff --git a/checkpoint-400/rng_state_3.pth b/checkpoint-400/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5cdbc3b6f8f0af49d64b6172e1da0c7e1999ce4e
--- /dev/null
+++ b/checkpoint-400/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:627c132479f39e9e71ecaf7302104bd89d7fbc86afe155dbb421a1553b73e3fc
+size 15024
diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bc6cc8d6fa3f57c5877f6c582ce3c0cf70fca5a3
--- /dev/null
+++ b/checkpoint-400/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72cf64a2002b0c7221de67ea018e9345a449eaa5da945e800c52dcdf8b9b6a8e
+size 1064
diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d36e31e91522484d19e9d842719e49c52ecc378c
--- /dev/null
+++ b/checkpoint-400/trainer_state.json
@@ -0,0 +1,301 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.029025469849793192,
+  "eval_steps": 500,
+  "global_step": 400,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 1.7890625,
+      "learning_rate": 0.0004996810467247207,
+      "loss": 0.2899,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 1.9296875,
+      "learning_rate": 0.0004996513988716786,
+      "loss": 0.2324,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 1.59375,
+      "learning_rate": 0.0004996204345883278,
+      "loss": 0.2376,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 1.734375,
+      "learning_rate": 0.0004995881540379454,
+      "loss": 0.2927,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 1.9375,
+      "learning_rate": 0.0004995545573907492,
+      "loss": 0.2738,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 2.421875,
+      "learning_rate": 0.0004995196448238966,
+      "loss": 0.2427,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 2.671875,
+      "learning_rate": 0.0004994834165214843,
+      "loss": 0.2032,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 3.328125,
+      "learning_rate": 0.0004994458726745468,
+      "loss": 0.2748,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.498046875,
+      "learning_rate": 0.0004994070134810556,
+      "loss": 0.2275,
+      "step": 400
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-400/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-410/README.md b/checkpoint-410/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-410/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-410/adapter_config.json b/checkpoint-410/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-410/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-410/adapter_model.safetensors b/checkpoint-410/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e70f06d434c9f0d7add9a314acfd73d57b51d33f
--- /dev/null
+++ b/checkpoint-410/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:176b984c37c7f2d8234074d3f2358d2b33442765190e2c46f1c4ea110a59c318
+size 5919456
diff --git a/checkpoint-410/optimizer.pt b/checkpoint-410/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5aacbd773d1ed66628dffba681720fe9ee4ebd2c
--- /dev/null
+++ b/checkpoint-410/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:174a183bb65a26c00223be0e774de12c199b266ddda34e58dba74895f5350acb
+size 11930938
diff --git a/checkpoint-410/rng_state_0.pth b/checkpoint-410/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5a10219fd6796b7171cfb8fd74630f35e9cb2f19
--- /dev/null
+++ b/checkpoint-410/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24525db6b2052ede211e33d8f6067c274a52bb12ad7de95fc2dca6926c1b23aa
+size 15024
diff --git a/checkpoint-410/rng_state_1.pth b/checkpoint-410/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..42076162158ac6fe0cc3e3845b4b37224cc4e8ef
--- /dev/null
+++ b/checkpoint-410/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9d89f1d5d29a830a31f2af11a3b4fdda286d4a34fb2bd7d3abb9e539275dfda
+size 15024
diff --git a/checkpoint-410/rng_state_2.pth b/checkpoint-410/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d27cc88e72e9e7ef8c3b87830b4724cdf1735033
--- /dev/null
+++ b/checkpoint-410/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce65d5bea5a8aaa2cca6c5d90b2ad798ac914b1a58181697c804c64922d0bf1a
+size 15024
diff --git a/checkpoint-410/rng_state_3.pth b/checkpoint-410/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1b4d8b97503f376bae6c386a0987b3d5a9afabdd
--- /dev/null
+++ b/checkpoint-410/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a4e8dd5fc8ed03ac9fbbc4d44ca3acc4f96ff97d29cd216f6fbe46c504e77b3
+size 15024
diff --git a/checkpoint-410/scheduler.pt b/checkpoint-410/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d7e77a46166750bb7b92184196c31fbdbcb4e319
--- /dev/null
+++ b/checkpoint-410/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f34328ff03a5e3f7b0f751c8679aa89b38193199cc9df2155f5d8e87b12a5a5a
+size 1064
diff --git a/checkpoint-410/trainer_state.json b/checkpoint-410/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f8680ab9ba44e78ff6ccee4cf86bc101fe1c986f
--- /dev/null
+++ b/checkpoint-410/trainer_state.json
@@ -0,0 +1,308 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.029751106596038024,
+  "eval_steps": 500,
+  "global_step": 410,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 1.7890625,
+      "learning_rate": 0.0004996810467247207,
+      "loss": 0.2899,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 1.9296875,
+      "learning_rate": 0.0004996513988716786,
+      "loss": 0.2324,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 1.59375,
+      "learning_rate": 0.0004996204345883278,
+      "loss": 0.2376,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 1.734375,
+      "learning_rate": 0.0004995881540379454,
+      "loss": 0.2927,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 1.9375,
+      "learning_rate": 0.0004995545573907492,
+      "loss": 0.2738,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 2.421875,
+      "learning_rate": 0.0004995196448238966,
+      "loss": 0.2427,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 2.671875,
+      "learning_rate": 0.0004994834165214843,
+      "loss": 0.2032,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 3.328125,
+      "learning_rate": 0.0004994458726745468,
+      "loss": 0.2748,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.498046875,
+      "learning_rate": 0.0004994070134810556,
+      "loss": 0.2275,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.000499366839145918,
+      "loss": 0.1725,
+      "step": 410
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-410/training_args.bin b/checkpoint-410/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-410/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-420/README.md b/checkpoint-420/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-420/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-420/adapter_config.json b/checkpoint-420/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-420/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-420/adapter_model.safetensors b/checkpoint-420/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5249d7efa9e10584efb0bc2ce61e39f84f06d977
--- /dev/null
+++ b/checkpoint-420/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad77eee216e029f37eae29abfc42f05d3fa392123cffa26886165c4b9c36f7f7
+size 5919456
diff --git a/checkpoint-420/optimizer.pt b/checkpoint-420/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..179bdd24c90a26c63b80e549503373e7d746254b
--- /dev/null
+++ b/checkpoint-420/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3c92c05d51abb66062bedcb7592ba346aa213f66a497d5e0ce1d7e41c85ca5a
+size 11930938
diff --git a/checkpoint-420/rng_state_0.pth b/checkpoint-420/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f357702f75f062f4be4aead2f569b9b57bb5bf0d
--- /dev/null
+++ b/checkpoint-420/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:426efd39e791d9aad034be3a3278882178e0bafd0949980040e6e0f4e76007ce
+size 15024
diff --git a/checkpoint-420/rng_state_1.pth b/checkpoint-420/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a7cf9841f4b8d2be7fa958d72ac2c2ce771cce27
--- /dev/null
+++ b/checkpoint-420/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff1be62cf8f366636ea8722d0645e8074ae04aaa5d3bbc7f1e02d041b584f247
+size 15024
diff --git a/checkpoint-420/rng_state_2.pth b/checkpoint-420/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b3474a0f04c7d4978bc633c074bb7130a9c059e4
--- /dev/null
+++ b/checkpoint-420/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c87960dbea60a0ad11846750febf910a22e6bab4ab82a44ec3a565aff8131f2
+size 15024
diff --git a/checkpoint-420/rng_state_3.pth b/checkpoint-420/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b1d2adc334eddd3078e2b8e301f47f18f1400c7e
--- /dev/null
+++ b/checkpoint-420/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb74cda1e8bc1c7ac4e43ea326433964d0d12f005655436df314179a50c9b4ce
+size 15024
diff --git a/checkpoint-420/scheduler.pt b/checkpoint-420/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c9508f0b089cb2e6dce23870125a97f7cf7f61c4
--- /dev/null
+++ b/checkpoint-420/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb87492493b37752fdf7ae8e6e05eedf9c4df28278a7009a0a8920eff2e7411c
+size 1064
diff --git a/checkpoint-420/trainer_state.json b/checkpoint-420/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..57fbe323418b7e02c088707ffbf6219de3469087
--- /dev/null
+++ b/checkpoint-420/trainer_state.json
@@ -0,0 +1,315 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.030476743342282852,
+  "eval_steps": 500,
+  "global_step": 420,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 1.7890625,
+      "learning_rate": 0.0004996810467247207,
+      "loss": 0.2899,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 1.9296875,
+      "learning_rate": 0.0004996513988716786,
+      "loss": 0.2324,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 1.59375,
+      "learning_rate": 0.0004996204345883278,
+      "loss": 0.2376,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 1.734375,
+      "learning_rate": 0.0004995881540379454,
+      "loss": 0.2927,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 1.9375,
+      "learning_rate": 0.0004995545573907492,
+      "loss": 0.2738,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 2.421875,
+      "learning_rate": 0.0004995196448238966,
+      "loss": 0.2427,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 2.671875,
+      "learning_rate": 0.0004994834165214843,
+      "loss": 0.2032,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 3.328125,
+      "learning_rate": 0.0004994458726745468,
+      "loss": 0.2748,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.498046875,
+      "learning_rate": 0.0004994070134810556,
+      "loss": 0.2275,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.000499366839145918,
+      "loss": 0.1725,
+      "step": 410
+    },
+    {
+      "epoch": 0.030476743342282852,
+      "grad_norm": 3.46875,
+      "learning_rate": 0.0004993253498809762,
+      "loss": 0.2298,
+      "step": 420
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-420/training_args.bin b/checkpoint-420/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-420/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-430/README.md b/checkpoint-430/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-430/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-430/adapter_config.json b/checkpoint-430/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-430/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-430/adapter_model.safetensors b/checkpoint-430/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3cdd6f30ffc8d5e7cd7bf31071f23ba3b9d61846
--- /dev/null
+++ b/checkpoint-430/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e1f7b22bd8507dbd5bb814833c950a08dfe6c0865fb807cc0d796d20ade07a5
+size 5919456
diff --git a/checkpoint-430/optimizer.pt b/checkpoint-430/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f2df2b22281a3d7a96269e3ba1d08b2a5aa22cd5
--- /dev/null
+++ b/checkpoint-430/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:338104f4cd5e0db44983fbc5edeaf69c0cd77c4bf47d4f80a12f15c653f4d1a8
+size 11930938
diff --git a/checkpoint-430/rng_state_0.pth b/checkpoint-430/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8058aa36b0d4b9a67ba1c1bce8485cd3fde721bc
--- /dev/null
+++ b/checkpoint-430/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7450fd18935b074e303410c6ee7b1759df9e00ff6ca83a4aea7b3d18b0ae72f6
+size 15024
diff --git a/checkpoint-430/rng_state_1.pth b/checkpoint-430/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c0d2b91f99b0aa9e554676f946104f185dbe4b50
--- /dev/null
+++ b/checkpoint-430/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:335f98ff53adc4b4ae643ea0ece2aa713eeb67e998dd892eebe667331ebf7953
+size 15024
diff --git a/checkpoint-430/rng_state_2.pth b/checkpoint-430/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..75f9ff5802d81a692dad10c4d13b994e0da922c9
--- /dev/null
+++ b/checkpoint-430/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ad4ef9d87614133017d84fa3f684c7a3aebcd4e1295ed37e3dc4d16cf359822
+size 15024
diff --git a/checkpoint-430/rng_state_3.pth b/checkpoint-430/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9f4bbba56f7df987cfa068bbe8c82860521263df
--- /dev/null
+++ b/checkpoint-430/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4e2215190444d86848e0357b60bd13cc0ccacb2c02b81a1b80097a272eed147
+size 15024
diff --git a/checkpoint-430/scheduler.pt b/checkpoint-430/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9cecd70a56f95744601b7f5119422735ed1c5984
--- /dev/null
+++ b/checkpoint-430/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8dd643d3ffa6c9375c04f3cfddf51a9c83d31bb6a922429e2a40b2461785e77a
+size 1064
diff --git a/checkpoint-430/trainer_state.json b/checkpoint-430/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..9b7ffd116e649a77db8574ce684af4c4f8041bec
--- /dev/null
+++ b/checkpoint-430/trainer_state.json
@@ -0,0 +1,322 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.031202380088527683,
+  "eval_steps": 500,
+  "global_step": 430,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 1.7890625,
+      "learning_rate": 0.0004996810467247207,
+      "loss": 0.2899,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 1.9296875,
+      "learning_rate": 0.0004996513988716786,
+      "loss": 0.2324,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 1.59375,
+      "learning_rate": 0.0004996204345883278,
+      "loss": 0.2376,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 1.734375,
+      "learning_rate": 0.0004995881540379454,
+      "loss": 0.2927,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 1.9375,
+      "learning_rate": 0.0004995545573907492,
+      "loss": 0.2738,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 2.421875,
+      "learning_rate": 0.0004995196448238966,
+      "loss": 0.2427,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 2.671875,
+      "learning_rate": 0.0004994834165214843,
+      "loss": 0.2032,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 3.328125,
+      "learning_rate": 0.0004994458726745468,
+      "loss": 0.2748,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.498046875,
+      "learning_rate": 0.0004994070134810556,
+      "loss": 0.2275,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.000499366839145918,
+      "loss": 0.1725,
+      "step": 410
+    },
+    {
+      "epoch": 0.030476743342282852,
+      "grad_norm": 3.46875,
+      "learning_rate": 0.0004993253498809762,
+      "loss": 0.2298,
+      "step": 420
+    },
+    {
+      "epoch": 0.031202380088527683,
+      "grad_norm": 2.875,
+      "learning_rate": 0.0004992825459050064,
+      "loss": 0.2721,
+      "step": 430
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-430/training_args.bin b/checkpoint-430/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-430/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-440/README.md b/checkpoint-440/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-440/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-440/adapter_config.json b/checkpoint-440/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-440/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-440/adapter_model.safetensors b/checkpoint-440/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b2f09522e3d9aa5be315333e0da024be036acbbe
--- /dev/null
+++ b/checkpoint-440/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e990203216e015d5e919e2dc343e3caeb210aa59f9d1a731dd551d2fb187dbe6
+size 5919456
diff --git a/checkpoint-440/optimizer.pt b/checkpoint-440/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8d80651128906c0d53fa769fd5a0abc1e318d507
--- /dev/null
+++ b/checkpoint-440/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:591ce317ae528234ed15a97886d823ebd9e0915acdf94cef933885111e8bf070
+size 11930938
diff --git a/checkpoint-440/rng_state_0.pth b/checkpoint-440/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4d2636ceb8a14218a8e0d1476a39b6e0e7a03cb5
--- /dev/null
+++ b/checkpoint-440/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e670d828f171705ef1de0e28997d07c7dca7a125ec52bc17025aaf4efe7ea5d
+size 15024
diff --git a/checkpoint-440/rng_state_1.pth b/checkpoint-440/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ffa40b71fe39bd6dd67895b18dc6a81f0a1334f7
--- /dev/null
+++ b/checkpoint-440/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23bb6217c29f859f909cb586c1ef966c2c25c18683ffdfdf5947b632a1f81e05
+size 15024
diff --git a/checkpoint-440/rng_state_2.pth b/checkpoint-440/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..65d5ef1e9a8fd69d583a07620b440cebb18536f9
--- /dev/null
+++ b/checkpoint-440/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85f9333f382174e1e1a5d70186992b682a1cf238376145534607c43e01390625
+size 15024
diff --git a/checkpoint-440/rng_state_3.pth b/checkpoint-440/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..274b738231d0fe4d66d5314b450de19158e77bc0
--- /dev/null
+++ b/checkpoint-440/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e92d697b8f695ccff1f67998fb1248fb948fc7ab5770f451a3221a884d92ad42
+size 15024
diff --git a/checkpoint-440/scheduler.pt b/checkpoint-440/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..789fe6070131cc5798a5fa856549f59d3c0aefea
--- /dev/null
+++ b/checkpoint-440/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95c83f64770c76234c0f4946573f20b470ef411143afc35ba335bd6d438e71f1
+size 1064
diff --git a/checkpoint-440/trainer_state.json b/checkpoint-440/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..7fe7e5d72f48f6c3f40b75d2eb25d6cc9d441647
--- /dev/null
+++ b/checkpoint-440/trainer_state.json
@@ -0,0 +1,329 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.03192801683477251,
+  "eval_steps": 500,
+  "global_step": 440,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 1.7890625,
+      "learning_rate": 0.0004996810467247207,
+      "loss": 0.2899,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 1.9296875,
+      "learning_rate": 0.0004996513988716786,
+      "loss": 0.2324,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 1.59375,
+      "learning_rate": 0.0004996204345883278,
+      "loss": 0.2376,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 1.734375,
+      "learning_rate": 0.0004995881540379454,
+      "loss": 0.2927,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 1.9375,
+      "learning_rate": 0.0004995545573907492,
+      "loss": 0.2738,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 2.421875,
+      "learning_rate": 0.0004995196448238966,
+      "loss": 0.2427,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 2.671875,
+      "learning_rate": 0.0004994834165214843,
+      "loss": 0.2032,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 3.328125,
+      "learning_rate": 0.0004994458726745468,
+      "loss": 0.2748,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.498046875,
+      "learning_rate": 0.0004994070134810556,
+      "loss": 0.2275,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.000499366839145918,
+      "loss": 0.1725,
+      "step": 410
+    },
+    {
+      "epoch": 0.030476743342282852,
+      "grad_norm": 3.46875,
+      "learning_rate": 0.0004993253498809762,
+      "loss": 0.2298,
+      "step": 420
+    },
+    {
+      "epoch": 0.031202380088527683,
+      "grad_norm": 2.875,
+      "learning_rate": 0.0004992825459050064,
+      "loss": 0.2721,
+      "step": 430
+    },
+    {
+      "epoch": 0.03192801683477251,
+      "grad_norm": 1.6640625,
+      "learning_rate": 0.0004992384274437171,
+      "loss": 0.248,
+      "step": 440
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-440/training_args.bin b/checkpoint-440/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-440/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-450/README.md b/checkpoint-450/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-450/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-450/adapter_config.json b/checkpoint-450/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-450/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-450/adapter_model.safetensors b/checkpoint-450/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fa220bf0fbc2fd32010a6957b57935389d16b2f4
--- /dev/null
+++ b/checkpoint-450/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec456368a5ec5a95eb4a84f86d4fe1804f007b457f02c7ce913b4f10c4df6ddd
+size 5919456
diff --git a/checkpoint-450/optimizer.pt b/checkpoint-450/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..97c0fd06d361a19a13562b7e15f33dfac957808e
--- /dev/null
+++ b/checkpoint-450/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2fe2f76ead7b737f71463318a99e5fea4db58bd2724f6d4948dabd5110846ff3
+size 11930938
diff --git a/checkpoint-450/rng_state_0.pth b/checkpoint-450/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0745b698c301be41bd431c96a48dad3a6eb34a3f
--- /dev/null
+++ b/checkpoint-450/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74c0bbe13ba6abf11c8e74c819dd4812dcf1cd31a15084b5d9a67d92cffcd15a
+size 15024
diff --git a/checkpoint-450/rng_state_1.pth b/checkpoint-450/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d574d6b69adc36930af48e3de39f40c58b94563e
--- /dev/null
+++ b/checkpoint-450/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4af53bdc297eb334ac93c2f0f17d63f14b6fde6ea4eab4e3168813ead2e95f2a
+size 15024
diff --git a/checkpoint-450/rng_state_2.pth b/checkpoint-450/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b02735cedf018b9522e0d896eebc25d9fe9ffe6c
--- /dev/null
+++ b/checkpoint-450/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8113f7db0a254d0cdc384448069e86023fafbe13c1d2d6b23e47a02b4ae9dc99
+size 15024
diff --git a/checkpoint-450/rng_state_3.pth b/checkpoint-450/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e7ac28f9fa89af4f49123ccad74a328c603a376d
--- /dev/null
+++ b/checkpoint-450/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:369be34e82f746d1ac1423468b34f53a5dfb7cd0bc6c2f70b663f6762401f7c5
+size 15024
diff --git a/checkpoint-450/scheduler.pt b/checkpoint-450/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..04ed3c976c71a6c085c736cf474e9afae921a35b
--- /dev/null
+++ b/checkpoint-450/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ffbd4ab07a686050d9a13d931815df55e8d46fcd882f6b3d693bb5305c838054
+size 1064
diff --git a/checkpoint-450/trainer_state.json b/checkpoint-450/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..a11b0600ed327e73d3a26a86d685e0529fcf5c6b
--- /dev/null
+++ b/checkpoint-450/trainer_state.json
@@ -0,0 +1,336 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.03265365358101734,
+  "eval_steps": 500,
+  "global_step": 450,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 1.7890625,
+      "learning_rate": 0.0004996810467247207,
+      "loss": 0.2899,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 1.9296875,
+      "learning_rate": 0.0004996513988716786,
+      "loss": 0.2324,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 1.59375,
+      "learning_rate": 0.0004996204345883278,
+      "loss": 0.2376,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 1.734375,
+      "learning_rate": 0.0004995881540379454,
+      "loss": 0.2927,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 1.9375,
+      "learning_rate": 0.0004995545573907492,
+      "loss": 0.2738,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 2.421875,
+      "learning_rate": 0.0004995196448238966,
+      "loss": 0.2427,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 2.671875,
+      "learning_rate": 0.0004994834165214843,
+      "loss": 0.2032,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 3.328125,
+      "learning_rate": 0.0004994458726745468,
+      "loss": 0.2748,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.498046875,
+      "learning_rate": 0.0004994070134810556,
+      "loss": 0.2275,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.000499366839145918,
+      "loss": 0.1725,
+      "step": 410
+    },
+    {
+      "epoch": 0.030476743342282852,
+      "grad_norm": 3.46875,
+      "learning_rate": 0.0004993253498809762,
+      "loss": 0.2298,
+      "step": 420
+    },
+    {
+      "epoch": 0.031202380088527683,
+      "grad_norm": 2.875,
+      "learning_rate": 0.0004992825459050064,
+      "loss": 0.2721,
+      "step": 430
+    },
+    {
+      "epoch": 0.03192801683477251,
+      "grad_norm": 1.6640625,
+      "learning_rate": 0.0004992384274437171,
+      "loss": 0.248,
+      "step": 440
+    },
+    {
+      "epoch": 0.03265365358101734,
+      "grad_norm": 1.8984375,
+      "learning_rate": 0.000499192994729748,
+      "loss": 0.1529,
+      "step": 450
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-450/training_args.bin b/checkpoint-450/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-450/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-460/README.md b/checkpoint-460/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-460/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-460/adapter_config.json b/checkpoint-460/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-460/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-460/adapter_model.safetensors b/checkpoint-460/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..76e4627d9e98a9bf98231b32edf5aff8065fda1f
--- /dev/null
+++ b/checkpoint-460/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2834ab4b1835eaac8a0e441da86f580a0febbe712e8a28608e10fd2d743eaa7
+size 5919456
diff --git a/checkpoint-460/optimizer.pt b/checkpoint-460/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b995d85411e2b36c42f826eca63a51b73b93afa2
--- /dev/null
+++ b/checkpoint-460/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4de0890369364de0a4b55ee3aa969379e4357f6d02a840e40edb96d61db1e824
+size 11930938
diff --git a/checkpoint-460/rng_state_0.pth b/checkpoint-460/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b838b908ff331437a6131d2af0e993a2bc005dd4
--- /dev/null
+++ b/checkpoint-460/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b8845c25047974accbaa9efd358cc36a461745fbf3ab5630bbe550cb2b851c2
+size 15024
diff --git a/checkpoint-460/rng_state_1.pth b/checkpoint-460/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..33f4548ad077877a96fbee360960d1664989a387
--- /dev/null
+++ b/checkpoint-460/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b199e4e6e20c7b33b046371768306edbdced633420d13ce24bafba41b6122d29
+size 15024
diff --git a/checkpoint-460/rng_state_2.pth b/checkpoint-460/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d56eefe51a3a86da4e791062a7c76f3df0331ffe
--- /dev/null
+++ b/checkpoint-460/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29753073ef4a349f5fd75b836509a96e00aff64408a70ac0de1cdab063ed75fb
+size 15024
diff --git a/checkpoint-460/rng_state_3.pth b/checkpoint-460/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..884793ddb489c0440cd3b99c10e422577e418add
--- /dev/null
+++ b/checkpoint-460/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04be3e3e127fc602bc75808f73c0de94b73bd8099b502291d7806b8705651a73
+size 15024
diff --git a/checkpoint-460/scheduler.pt b/checkpoint-460/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..595cfbbc2fdb98e90e559e71ed140f8820259ce3
--- /dev/null
+++ b/checkpoint-460/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb3aa7fb9c421f69938febecc48b5cd937a0c7d621015d7e70c5865e96ec49c8
+size 1064
diff --git a/checkpoint-460/trainer_state.json b/checkpoint-460/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..10e4590b40bee11868b9bf78d880c4dfa03f267e
--- /dev/null
+++ b/checkpoint-460/trainer_state.json
@@ -0,0 +1,343 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.033379290327262175,
+  "eval_steps": 500,
+  "global_step": 460,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 1.7890625,
+      "learning_rate": 0.0004996810467247207,
+      "loss": 0.2899,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 1.9296875,
+      "learning_rate": 0.0004996513988716786,
+      "loss": 0.2324,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 1.59375,
+      "learning_rate": 0.0004996204345883278,
+      "loss": 0.2376,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 1.734375,
+      "learning_rate": 0.0004995881540379454,
+      "loss": 0.2927,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 1.9375,
+      "learning_rate": 0.0004995545573907492,
+      "loss": 0.2738,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 2.421875,
+      "learning_rate": 0.0004995196448238966,
+      "loss": 0.2427,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 2.671875,
+      "learning_rate": 0.0004994834165214843,
+      "loss": 0.2032,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 3.328125,
+      "learning_rate": 0.0004994458726745468,
+      "loss": 0.2748,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.498046875,
+      "learning_rate": 0.0004994070134810556,
+      "loss": 0.2275,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.000499366839145918,
+      "loss": 0.1725,
+      "step": 410
+    },
+    {
+      "epoch": 0.030476743342282852,
+      "grad_norm": 3.46875,
+      "learning_rate": 0.0004993253498809762,
+      "loss": 0.2298,
+      "step": 420
+    },
+    {
+      "epoch": 0.031202380088527683,
+      "grad_norm": 2.875,
+      "learning_rate": 0.0004992825459050064,
+      "loss": 0.2721,
+      "step": 430
+    },
+    {
+      "epoch": 0.03192801683477251,
+      "grad_norm": 1.6640625,
+      "learning_rate": 0.0004992384274437171,
+      "loss": 0.248,
+      "step": 440
+    },
+    {
+      "epoch": 0.03265365358101734,
+      "grad_norm": 1.8984375,
+      "learning_rate": 0.000499192994729748,
+      "loss": 0.1529,
+      "step": 450
+    },
+    {
+      "epoch": 0.033379290327262175,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004991462480026693,
+      "loss": 0.2584,
+      "step": 460
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-460/training_args.bin b/checkpoint-460/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-460/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-470/README.md b/checkpoint-470/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-470/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-470/adapter_config.json b/checkpoint-470/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-470/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-470/adapter_model.safetensors b/checkpoint-470/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a60310f78f7362da8cca0a9dc8fe75b41a5afaff
--- /dev/null
+++ b/checkpoint-470/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26b0fc6c1077fa23c5494d25aaa540602afd3833bb75ab368993d0e9955acf47
+size 5919456
diff --git a/checkpoint-470/optimizer.pt b/checkpoint-470/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d1a96295a37518359ac6908800d8ccb637dab21b
--- /dev/null
+++ b/checkpoint-470/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bee701e618d96e0e3cc866adf457dddc951a2ae06e8011a54ece1879398389ac
+size 11930938
diff --git a/checkpoint-470/rng_state_0.pth b/checkpoint-470/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0688ed11d1c135deff86938945f90db7ed5ddbd1
--- /dev/null
+++ b/checkpoint-470/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adb32e48094a0dac7955fa2f09f16dac717c26183a1658a52b7573a5de626d15
+size 15024
diff --git a/checkpoint-470/rng_state_1.pth b/checkpoint-470/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0b6938c07f65a9809de4d22b9e0318498658493c
--- /dev/null
+++ b/checkpoint-470/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ff83bd39b36fad96801a968f07d8d1238dc9b1457c73621fec74fa93d13654d
+size 15024
diff --git a/checkpoint-470/rng_state_2.pth b/checkpoint-470/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..77d6acbeeaf667480e511fe2883124a02cc9a734
--- /dev/null
+++ b/checkpoint-470/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d3d504b68e122f85d20d5aeb96e17473c5eb042edd256a20bcf11ddca8efc63
+size 15024
diff --git a/checkpoint-470/rng_state_3.pth b/checkpoint-470/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7d06f172f522dc80c377b02a019e6c0c67aae400
--- /dev/null
+++ b/checkpoint-470/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adb16d739758dd771f1f612ef8e6c47be299b5accb7714b043af6f5c8e3e86ca
+size 15024
diff --git a/checkpoint-470/scheduler.pt b/checkpoint-470/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..67f4cf86e6c2deff75884de217841989b55198ad
--- /dev/null
+++ b/checkpoint-470/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0523cfc11926da1570a12c32004d2b725e98d80e610da83d4ce8dffb1062870
+size 1064
diff --git a/checkpoint-470/trainer_state.json b/checkpoint-470/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..bb805c6e972541f7b17dd7ef64ea232618aa1564
--- /dev/null
+++ b/checkpoint-470/trainer_state.json
@@ -0,0 +1,350 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.034104927073507,
+  "eval_steps": 500,
+  "global_step": 470,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 1.7890625,
+      "learning_rate": 0.0004996810467247207,
+      "loss": 0.2899,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 1.9296875,
+      "learning_rate": 0.0004996513988716786,
+      "loss": 0.2324,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 1.59375,
+      "learning_rate": 0.0004996204345883278,
+      "loss": 0.2376,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 1.734375,
+      "learning_rate": 0.0004995881540379454,
+      "loss": 0.2927,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 1.9375,
+      "learning_rate": 0.0004995545573907492,
+      "loss": 0.2738,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 2.421875,
+      "learning_rate": 0.0004995196448238966,
+      "loss": 0.2427,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 2.671875,
+      "learning_rate": 0.0004994834165214843,
+      "loss": 0.2032,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 3.328125,
+      "learning_rate": 0.0004994458726745468,
+      "loss": 0.2748,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.498046875,
+      "learning_rate": 0.0004994070134810556,
+      "loss": 0.2275,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.000499366839145918,
+      "loss": 0.1725,
+      "step": 410
+    },
+    {
+      "epoch": 0.030476743342282852,
+      "grad_norm": 3.46875,
+      "learning_rate": 0.0004993253498809762,
+      "loss": 0.2298,
+      "step": 420
+    },
+    {
+      "epoch": 0.031202380088527683,
+      "grad_norm": 2.875,
+      "learning_rate": 0.0004992825459050064,
+      "loss": 0.2721,
+      "step": 430
+    },
+    {
+      "epoch": 0.03192801683477251,
+      "grad_norm": 1.6640625,
+      "learning_rate": 0.0004992384274437171,
+      "loss": 0.248,
+      "step": 440
+    },
+    {
+      "epoch": 0.03265365358101734,
+      "grad_norm": 1.8984375,
+      "learning_rate": 0.000499192994729748,
+      "loss": 0.1529,
+      "step": 450
+    },
+    {
+      "epoch": 0.033379290327262175,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004991462480026693,
+      "loss": 0.2584,
+      "step": 460
+    },
+    {
+      "epoch": 0.034104927073507,
+      "grad_norm": 0.984375,
+      "learning_rate": 0.0004990981875089799,
+      "loss": 0.25,
+      "step": 470
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-470/training_args.bin b/checkpoint-470/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-470/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-480/README.md b/checkpoint-480/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-480/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-480/adapter_config.json b/checkpoint-480/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-480/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-480/adapter_model.safetensors b/checkpoint-480/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bf3ca02f3fa7e3aa6dab3c96413f3c6eb8eff182
--- /dev/null
+++ b/checkpoint-480/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf33e3c14cf50b78fbb7dcfc64027cf4e041d3fb4812697d9259565cda33a4f6
+size 5919456
diff --git a/checkpoint-480/optimizer.pt b/checkpoint-480/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..80d16c724f7ee137d3b300f18478828d3499b31f
--- /dev/null
+++ b/checkpoint-480/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3bd77339c7ffd197c288485b49fe620ededb7b7339e9fd25972956d5667f0896
+size 11930938
diff --git a/checkpoint-480/rng_state_0.pth b/checkpoint-480/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..dd695f5ec0a9b100368676106e8249dca09000cb
--- /dev/null
+++ b/checkpoint-480/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bdd7fe83304da1de4b6e029b064d38818f9e09a77fa7b1837c391d69a4db80d
+size 15024
diff --git a/checkpoint-480/rng_state_1.pth b/checkpoint-480/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4395d4574ab705608389a4096ac7429c17391b99
--- /dev/null
+++ b/checkpoint-480/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a43726b12c49859b617cf6ad2e9ce7c229b70cdbac211acd7ab9fb98f436d792
+size 15024
diff --git a/checkpoint-480/rng_state_2.pth b/checkpoint-480/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4596e3f7fa806e23ac18d17854250bf36b96348f
--- /dev/null
+++ b/checkpoint-480/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b073c32a283cc7db429d91fad9c6c3b3750532a54d7bac5b9d921554f64c6e2
+size 15024
diff --git a/checkpoint-480/rng_state_3.pth b/checkpoint-480/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..24813190ba7d7663d2de299dc64912c3742e2731
--- /dev/null
+++ b/checkpoint-480/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32de2571eae3fed510c12c3aee5f09bc3d176d5edc3a8b9c5d855f87f2d04e83
+size 15024
diff --git a/checkpoint-480/scheduler.pt b/checkpoint-480/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d9e58ae6e473763a9a9db15012ffb71ab364e454
--- /dev/null
+++ b/checkpoint-480/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5e2d764a10c06d0d5a1aa9ddcaccf3ed5c6afba87acb1d79aa4b4b6752df610
+size 1064
diff --git a/checkpoint-480/trainer_state.json b/checkpoint-480/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..88f1fd458b867450ed751590ac5947913fb247d8
--- /dev/null
+++ b/checkpoint-480/trainer_state.json
@@ -0,0 +1,357 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.03483056381975183,
+  "eval_steps": 500,
+  "global_step": 480,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 1.7890625,
+      "learning_rate": 0.0004996810467247207,
+      "loss": 0.2899,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 1.9296875,
+      "learning_rate": 0.0004996513988716786,
+      "loss": 0.2324,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 1.59375,
+      "learning_rate": 0.0004996204345883278,
+      "loss": 0.2376,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 1.734375,
+      "learning_rate": 0.0004995881540379454,
+      "loss": 0.2927,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 1.9375,
+      "learning_rate": 0.0004995545573907492,
+      "loss": 0.2738,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 2.421875,
+      "learning_rate": 0.0004995196448238966,
+      "loss": 0.2427,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 2.671875,
+      "learning_rate": 0.0004994834165214843,
+      "loss": 0.2032,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 3.328125,
+      "learning_rate": 0.0004994458726745468,
+      "loss": 0.2748,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.498046875,
+      "learning_rate": 0.0004994070134810556,
+      "loss": 0.2275,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.000499366839145918,
+      "loss": 0.1725,
+      "step": 410
+    },
+    {
+      "epoch": 0.030476743342282852,
+      "grad_norm": 3.46875,
+      "learning_rate": 0.0004993253498809762,
+      "loss": 0.2298,
+      "step": 420
+    },
+    {
+      "epoch": 0.031202380088527683,
+      "grad_norm": 2.875,
+      "learning_rate": 0.0004992825459050064,
+      "loss": 0.2721,
+      "step": 430
+    },
+    {
+      "epoch": 0.03192801683477251,
+      "grad_norm": 1.6640625,
+      "learning_rate": 0.0004992384274437171,
+      "loss": 0.248,
+      "step": 440
+    },
+    {
+      "epoch": 0.03265365358101734,
+      "grad_norm": 1.8984375,
+      "learning_rate": 0.000499192994729748,
+      "loss": 0.1529,
+      "step": 450
+    },
+    {
+      "epoch": 0.033379290327262175,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004991462480026693,
+      "loss": 0.2584,
+      "step": 460
+    },
+    {
+      "epoch": 0.034104927073507,
+      "grad_norm": 0.984375,
+      "learning_rate": 0.0004990981875089799,
+      "loss": 0.25,
+      "step": 470
+    },
+    {
+      "epoch": 0.03483056381975183,
+      "grad_norm": 1.75,
+      "learning_rate": 0.0004990488135021065,
+      "loss": 0.199,
+      "step": 480
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-480/training_args.bin b/checkpoint-480/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-480/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-490/README.md b/checkpoint-490/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-490/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-490/adapter_config.json b/checkpoint-490/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-490/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-490/adapter_model.safetensors b/checkpoint-490/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..67f80bc1d238d10f3a9e9e36d00295410cc01ed1
--- /dev/null
+++ b/checkpoint-490/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:761f254ef81f89d66777e09d6fa4f9fdc40fabd10f8c4bfe0a3b9cd10c71dc6b
+size 5919456
diff --git a/checkpoint-490/optimizer.pt b/checkpoint-490/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..917e1a9ba6e064cdb67d94fd828941d91d6fdb7f
--- /dev/null
+++ b/checkpoint-490/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94bc36be2beadf776b25b7f51528e29c65491db21e6fadb96e97b11f7cabf145
+size 11930938
diff --git a/checkpoint-490/rng_state_0.pth b/checkpoint-490/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7317b2a071f4d4c6f8928de009a47d5a279b4a3f
--- /dev/null
+++ b/checkpoint-490/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c39e6dd933a981504f685d9eeda547754279c4ea34bdbe961ca8cac2e3da23ed
+size 15024
diff --git a/checkpoint-490/rng_state_1.pth b/checkpoint-490/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e17a24288d670453b9309de7536e766bc4466cfc
--- /dev/null
+++ b/checkpoint-490/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da140232ee2cf7677bd8bc3ae61146529b06bddde62889ba5a140503cefa7f75
+size 15024
diff --git a/checkpoint-490/rng_state_2.pth b/checkpoint-490/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b49ca76f7aa7d5ae52dc6fd8ddba0c7a228405ad
--- /dev/null
+++ b/checkpoint-490/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f899414b9c868c63ca69be28e7db7a82431120a8f303fc775bd158aae7551a82
+size 15024
diff --git a/checkpoint-490/rng_state_3.pth b/checkpoint-490/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d18c9fa1a3167e5106bbd251fa488da008916302
--- /dev/null
+++ b/checkpoint-490/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9c53a8c775f3fc0ce9f5674678dfd36ef6d99a7137fcf28bd9a8052397714b4
+size 15024
diff --git a/checkpoint-490/scheduler.pt b/checkpoint-490/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8cdfec0f6109d7e4bb4087a740cf80607caeda21
--- /dev/null
+++ b/checkpoint-490/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21c2fd858056da6465c46e0ae54dfdcd4bfb137881aacc68bfd730d7e7827eec
+size 1064
diff --git a/checkpoint-490/trainer_state.json b/checkpoint-490/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..43a9f741465969a010cc669cd49c67099a3bd42e
--- /dev/null
+++ b/checkpoint-490/trainer_state.json
@@ -0,0 +1,364 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.03555620056599666,
+  "eval_steps": 500,
+  "global_step": 490,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 1.7890625,
+      "learning_rate": 0.0004996810467247207,
+      "loss": 0.2899,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 1.9296875,
+      "learning_rate": 0.0004996513988716786,
+      "loss": 0.2324,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 1.59375,
+      "learning_rate": 0.0004996204345883278,
+      "loss": 0.2376,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 1.734375,
+      "learning_rate": 0.0004995881540379454,
+      "loss": 0.2927,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 1.9375,
+      "learning_rate": 0.0004995545573907492,
+      "loss": 0.2738,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 2.421875,
+      "learning_rate": 0.0004995196448238966,
+      "loss": 0.2427,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 2.671875,
+      "learning_rate": 0.0004994834165214843,
+      "loss": 0.2032,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 3.328125,
+      "learning_rate": 0.0004994458726745468,
+      "loss": 0.2748,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.498046875,
+      "learning_rate": 0.0004994070134810556,
+      "loss": 0.2275,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.000499366839145918,
+      "loss": 0.1725,
+      "step": 410
+    },
+    {
+      "epoch": 0.030476743342282852,
+      "grad_norm": 3.46875,
+      "learning_rate": 0.0004993253498809762,
+      "loss": 0.2298,
+      "step": 420
+    },
+    {
+      "epoch": 0.031202380088527683,
+      "grad_norm": 2.875,
+      "learning_rate": 0.0004992825459050064,
+      "loss": 0.2721,
+      "step": 430
+    },
+    {
+      "epoch": 0.03192801683477251,
+      "grad_norm": 1.6640625,
+      "learning_rate": 0.0004992384274437171,
+      "loss": 0.248,
+      "step": 440
+    },
+    {
+      "epoch": 0.03265365358101734,
+      "grad_norm": 1.8984375,
+      "learning_rate": 0.000499192994729748,
+      "loss": 0.1529,
+      "step": 450
+    },
+    {
+      "epoch": 0.033379290327262175,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004991462480026693,
+      "loss": 0.2584,
+      "step": 460
+    },
+    {
+      "epoch": 0.034104927073507,
+      "grad_norm": 0.984375,
+      "learning_rate": 0.0004990981875089799,
+      "loss": 0.25,
+      "step": 470
+    },
+    {
+      "epoch": 0.03483056381975183,
+      "grad_norm": 1.75,
+      "learning_rate": 0.0004990488135021065,
+      "loss": 0.199,
+      "step": 480
+    },
+    {
+      "epoch": 0.03555620056599666,
+      "grad_norm": 0.7421875,
+      "learning_rate": 0.0004989981262424017,
+      "loss": 0.2546,
+      "step": 490
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-490/training_args.bin b/checkpoint-490/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-490/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-50/README.md b/checkpoint-50/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-50/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-50/adapter_config.json b/checkpoint-50/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-50/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-50/adapter_model.safetensors b/checkpoint-50/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5e6a9b9552e5a2ccea2bf4487e44003901d244e1
--- /dev/null
+++ b/checkpoint-50/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9239459365c06454c18bc74520ff0515077d1c797a056ffd794db24883822360
+size 5919456
diff --git a/checkpoint-50/optimizer.pt b/checkpoint-50/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..08aa7b1b5d7066e1ff64b2b64b8f103eb329aa67
--- /dev/null
+++ b/checkpoint-50/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ead541c143a630eebb1e44d25520f3a860da3503268c825d1695161d8436d1a
+size 11930938
diff --git a/checkpoint-50/rng_state_0.pth b/checkpoint-50/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c22151a1209c798e2edd9f72d7c214a21b84a5e7
--- /dev/null
+++ b/checkpoint-50/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ca2453cc0e6f21b27dd55161c5ceb9f93dba9a60fdcc2334b54014fdebd27f1
+size 15024
diff --git a/checkpoint-50/rng_state_1.pth b/checkpoint-50/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..125a8e581dc62840fccde0304035473fa67aea68
--- /dev/null
+++ b/checkpoint-50/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fccf4bd170cfbbcb8f5cb475846f3d9201f4812f364f4429f76430941f4474e7
+size 15024
diff --git a/checkpoint-50/rng_state_2.pth b/checkpoint-50/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ae9d38fdb5e2b50484f98af2342b6994395c3c48
--- /dev/null
+++ b/checkpoint-50/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44870a1cd57a925d8984c35d5ca35418b5efaf6a7e4ddb35fef82771a7a8657e
+size 15024
diff --git a/checkpoint-50/rng_state_3.pth b/checkpoint-50/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e4c8704fd79cbe7b2ab10d373a0dc386263bdad4
--- /dev/null
+++ b/checkpoint-50/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2db40412024102d43c49ec94e638c45b8466c0d3c5054616de33a12991ef0e0
+size 15024
diff --git a/checkpoint-50/scheduler.pt b/checkpoint-50/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..76674340cd93aeed006ce4a3f2959b7b29f72f2f
--- /dev/null
+++ b/checkpoint-50/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66cc3a8945cf00872adf33d73b92f08a7d789c454f681ddafb55574830e54498
+size 1064
diff --git a/checkpoint-50/trainer_state.json b/checkpoint-50/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..66a9ec812f0eb5f681ec9676e9612366dfaffe36
--- /dev/null
+++ b/checkpoint-50/trainer_state.json
@@ -0,0 +1,56 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.003628183731224149,
+  "eval_steps": 500,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-50/training_args.bin b/checkpoint-50/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-50/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-500/README.md b/checkpoint-500/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-500/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-500/adapter_config.json b/checkpoint-500/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-500/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-500/adapter_model.safetensors b/checkpoint-500/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5a75e953305e5a688c2905001e60604061ef9a9c
--- /dev/null
+++ b/checkpoint-500/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a94673d632a8c141dcc87075bdee31a94323637f802ce23606f05eb8d8a64af
+size 5919456
diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..092b6268634b722292670997c5478d9f7277030b
--- /dev/null
+++ b/checkpoint-500/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3729ec9b3365e00f31193defab02be28cb8aa2942f31455c44334485860fbf08
+size 11930938
diff --git a/checkpoint-500/rng_state_0.pth b/checkpoint-500/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ecfd871d7ca3dac10b6fbf240674f00e3bbbb3e5
--- /dev/null
+++ b/checkpoint-500/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61fcabab618aa920263cc05562edc63ab379bbc2af6214bd9b50f4d82d1baa88
+size 15024
diff --git a/checkpoint-500/rng_state_1.pth b/checkpoint-500/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cf80463aba5b1dcde23e73ae610d777aeba81574
--- /dev/null
+++ b/checkpoint-500/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3bed45a2ce03911ebbd9f3617ca35a520640d1441b923c5de1a5b3d0899274d3
+size 15024
diff --git a/checkpoint-500/rng_state_2.pth b/checkpoint-500/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0e060fff7b5c3f86838f3bca421e2c0eae7e88bb
--- /dev/null
+++ b/checkpoint-500/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39b3770ffe7b1e1c70bb3442c8631010c8c7abec9f87214214c5cfb8e07e3ce6
+size 15024
diff --git a/checkpoint-500/rng_state_3.pth b/checkpoint-500/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b08e4ee1790ce402cb107a08f52a184200d23dab
--- /dev/null
+++ b/checkpoint-500/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0242c29973a37a9bb180209978d26830ada301aa01aebf1057682da5a647fa69
+size 15024
diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d2504b4e6ae56933419528a76a7e402a64258783
--- /dev/null
+++ b/checkpoint-500/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:859e1b013fbe30b238bc68d254020323d7f4f3f490318d5dfbb4f24bed32db2a
+size 1064
diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..b98374223ea6a1a1639c644448799389a7c61ced
--- /dev/null
+++ b/checkpoint-500/trainer_state.json
@@ -0,0 +1,371 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.036281837312241494,
+  "eval_steps": 500,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    },
+    {
+      "epoch": 0.007256367462448298,
+      "grad_norm": 2.640625,
+      "learning_rate": 0.0005,
+      "loss": 0.2668,
+      "step": 100
+    },
+    {
+      "epoch": 0.007982004208693128,
+      "grad_norm": 1.5859375,
+      "learning_rate": 0.000499999340865746,
+      "loss": 0.2922,
+      "step": 110
+    },
+    {
+      "epoch": 0.008707640954937958,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.0004999973634664594,
+      "loss": 0.3996,
+      "step": 120
+    },
+    {
+      "epoch": 0.009433277701182788,
+      "grad_norm": 2.0625,
+      "learning_rate": 0.0004999940678125673,
+      "loss": 0.2841,
+      "step": 130
+    },
+    {
+      "epoch": 0.010158914447427617,
+      "grad_norm": 3.390625,
+      "learning_rate": 0.000499989453921448,
+      "loss": 0.3003,
+      "step": 140
+    },
+    {
+      "epoch": 0.010884551193672447,
+      "grad_norm": 1.65625,
+      "learning_rate": 0.0004999835218174307,
+      "loss": 0.2747,
+      "step": 150
+    },
+    {
+      "epoch": 0.011610187939917277,
+      "grad_norm": 1.671875,
+      "learning_rate": 0.000499976271531796,
+      "loss": 0.37,
+      "step": 160
+    },
+    {
+      "epoch": 0.012335824686162107,
+      "grad_norm": 1.0390625,
+      "learning_rate": 0.000499967703102775,
+      "loss": 0.2163,
+      "step": 170
+    },
+    {
+      "epoch": 0.013061461432406937,
+      "grad_norm": 2.546875,
+      "learning_rate": 0.00049995781657555,
+      "loss": 0.3652,
+      "step": 180
+    },
+    {
+      "epoch": 0.013787098178651766,
+      "grad_norm": 3.71875,
+      "learning_rate": 0.000499946612002253,
+      "loss": 0.3177,
+      "step": 190
+    },
+    {
+      "epoch": 0.014512734924896596,
+      "grad_norm": 2.6875,
+      "learning_rate": 0.0004999340894419668,
+      "loss": 0.2043,
+      "step": 200
+    },
+    {
+      "epoch": 0.015238371671141426,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0004999202489607236,
+      "loss": 0.2865,
+      "step": 210
+    },
+    {
+      "epoch": 0.015964008417386256,
+      "grad_norm": 3.015625,
+      "learning_rate": 0.0004999050906315055,
+      "loss": 0.2039,
+      "step": 220
+    },
+    {
+      "epoch": 0.016689645163631087,
+      "grad_norm": 0.99609375,
+      "learning_rate": 0.0004998886145342434,
+      "loss": 0.3509,
+      "step": 230
+    },
+    {
+      "epoch": 0.017415281909875915,
+      "grad_norm": 1.34375,
+      "learning_rate": 0.0004998708207558168,
+      "loss": 0.3208,
+      "step": 240
+    },
+    {
+      "epoch": 0.018140918656120747,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.0004998517093900539,
+      "loss": 0.2307,
+      "step": 250
+    },
+    {
+      "epoch": 0.018866555402365575,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.0004998312805377302,
+      "loss": 0.2232,
+      "step": 260
+    },
+    {
+      "epoch": 0.019592192148610407,
+      "grad_norm": 1.7265625,
+      "learning_rate": 0.0004998095343065685,
+      "loss": 0.2587,
+      "step": 270
+    },
+    {
+      "epoch": 0.020317828894855235,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0004997864708112384,
+      "loss": 0.2175,
+      "step": 280
+    },
+    {
+      "epoch": 0.021043465641100066,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004997620901733554,
+      "loss": 0.2185,
+      "step": 290
+    },
+    {
+      "epoch": 0.021769102387344894,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.0004997363925214804,
+      "loss": 0.2409,
+      "step": 300
+    },
+    {
+      "epoch": 0.022494739133589726,
+      "grad_norm": 1.7109375,
+      "learning_rate": 0.000499709377991119,
+      "loss": 0.1976,
+      "step": 310
+    },
+    {
+      "epoch": 0.023220375879834554,
+      "grad_norm": 1.7890625,
+      "learning_rate": 0.0004996810467247207,
+      "loss": 0.2899,
+      "step": 320
+    },
+    {
+      "epoch": 0.023946012626079385,
+      "grad_norm": 1.9296875,
+      "learning_rate": 0.0004996513988716786,
+      "loss": 0.2324,
+      "step": 330
+    },
+    {
+      "epoch": 0.024671649372324213,
+      "grad_norm": 1.59375,
+      "learning_rate": 0.0004996204345883278,
+      "loss": 0.2376,
+      "step": 340
+    },
+    {
+      "epoch": 0.025397286118569045,
+      "grad_norm": 1.734375,
+      "learning_rate": 0.0004995881540379454,
+      "loss": 0.2927,
+      "step": 350
+    },
+    {
+      "epoch": 0.026122922864813873,
+      "grad_norm": 1.9375,
+      "learning_rate": 0.0004995545573907492,
+      "loss": 0.2738,
+      "step": 360
+    },
+    {
+      "epoch": 0.026848559611058705,
+      "grad_norm": 2.421875,
+      "learning_rate": 0.0004995196448238966,
+      "loss": 0.2427,
+      "step": 370
+    },
+    {
+      "epoch": 0.027574196357303533,
+      "grad_norm": 2.671875,
+      "learning_rate": 0.0004994834165214843,
+      "loss": 0.2032,
+      "step": 380
+    },
+    {
+      "epoch": 0.028299833103548364,
+      "grad_norm": 3.328125,
+      "learning_rate": 0.0004994458726745468,
+      "loss": 0.2748,
+      "step": 390
+    },
+    {
+      "epoch": 0.029025469849793192,
+      "grad_norm": 0.498046875,
+      "learning_rate": 0.0004994070134810556,
+      "loss": 0.2275,
+      "step": 400
+    },
+    {
+      "epoch": 0.029751106596038024,
+      "grad_norm": 1.0703125,
+      "learning_rate": 0.000499366839145918,
+      "loss": 0.1725,
+      "step": 410
+    },
+    {
+      "epoch": 0.030476743342282852,
+      "grad_norm": 3.46875,
+      "learning_rate": 0.0004993253498809762,
+      "loss": 0.2298,
+      "step": 420
+    },
+    {
+      "epoch": 0.031202380088527683,
+      "grad_norm": 2.875,
+      "learning_rate": 0.0004992825459050064,
+      "loss": 0.2721,
+      "step": 430
+    },
+    {
+      "epoch": 0.03192801683477251,
+      "grad_norm": 1.6640625,
+      "learning_rate": 0.0004992384274437171,
+      "loss": 0.248,
+      "step": 440
+    },
+    {
+      "epoch": 0.03265365358101734,
+      "grad_norm": 1.8984375,
+      "learning_rate": 0.000499192994729748,
+      "loss": 0.1529,
+      "step": 450
+    },
+    {
+      "epoch": 0.033379290327262175,
+      "grad_norm": 2.234375,
+      "learning_rate": 0.0004991462480026693,
+      "loss": 0.2584,
+      "step": 460
+    },
+    {
+      "epoch": 0.034104927073507,
+      "grad_norm": 0.984375,
+      "learning_rate": 0.0004990981875089799,
+      "loss": 0.25,
+      "step": 470
+    },
+    {
+      "epoch": 0.03483056381975183,
+      "grad_norm": 1.75,
+      "learning_rate": 0.0004990488135021065,
+      "loss": 0.199,
+      "step": 480
+    },
+    {
+      "epoch": 0.03555620056599666,
+      "grad_norm": 0.7421875,
+      "learning_rate": 0.0004989981262424017,
+      "loss": 0.2546,
+      "step": 490
+    },
+    {
+      "epoch": 0.036281837312241494,
+      "grad_norm": 5.6875,
+      "learning_rate": 0.0004989461259971432,
+      "loss": 0.1523,
+      "step": 500
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-500/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-60/README.md b/checkpoint-60/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-60/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-60/adapter_config.json b/checkpoint-60/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-60/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-60/adapter_model.safetensors b/checkpoint-60/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..95deea04eebde18c4ef5073d0f233474ba8f6e2b
--- /dev/null
+++ b/checkpoint-60/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ffc0b1e40df309488de28bc639669333a6fa23fefc9e18bd482709b0dab7dda
+size 5919456
diff --git a/checkpoint-60/optimizer.pt b/checkpoint-60/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8ab9376d66481f17508233c2c418166ab7f12406
--- /dev/null
+++ b/checkpoint-60/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8e70fd7afe846461dfae557dbbcd5824ef2509e37536a7052d65cde3b041954
+size 11930938
diff --git a/checkpoint-60/rng_state_0.pth b/checkpoint-60/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2b20205969f8ceab0cefd399d3b970db10ce340f
--- /dev/null
+++ b/checkpoint-60/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f1f1853bd0289e0c2e286eebf30fcab4a60b8920f48c47f77d26ce7d8bceb68
+size 15024
diff --git a/checkpoint-60/rng_state_1.pth b/checkpoint-60/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2c1773fb49b04c4d9f7577c4db77aefc1b0088f6
--- /dev/null
+++ b/checkpoint-60/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c70ca697246aed9cd7431fef50ae27f427404f1a9c5b2b804ce1be7c6f1d21e9
+size 15024
diff --git a/checkpoint-60/rng_state_2.pth b/checkpoint-60/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..49e9cc7554ed5cdab4a255dc022f887b1f39056f
--- /dev/null
+++ b/checkpoint-60/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aedcb19fea85bf51e8855f549e5c3c5a5674194d15e5bb6c9d9f240c26b75a27
+size 15024
diff --git a/checkpoint-60/rng_state_3.pth b/checkpoint-60/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1abec23db3478fa5c839adcffe62874068160577
--- /dev/null
+++ b/checkpoint-60/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20e5f3f26883631500b94d11d5efc3f8a6d767ce8d700d55ba632033c3586e02
+size 15024
diff --git a/checkpoint-60/scheduler.pt b/checkpoint-60/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..458fba09c157a71cbbdde2b9613e797a5e2d65cf
--- /dev/null
+++ b/checkpoint-60/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df166ed3d15765988caace624e48790c1828a0e504ddc5f4f32088517d27e31f
+size 1064
diff --git a/checkpoint-60/trainer_state.json b/checkpoint-60/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..22563423b8e5615dccd63913d581a61d1655801f
--- /dev/null
+++ b/checkpoint-60/trainer_state.json
@@ -0,0 +1,63 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.004353820477468979,
+  "eval_steps": 500,
+  "global_step": 60,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-60/training_args.bin b/checkpoint-60/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-60/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-70/README.md b/checkpoint-70/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-70/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-70/adapter_config.json b/checkpoint-70/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-70/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-70/adapter_model.safetensors b/checkpoint-70/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4dd87c67a93a56195522dbc87ebfdc326acdbaa3
--- /dev/null
+++ b/checkpoint-70/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:198c64f96d0ab62d9a32aa716c07c7778b3f188bdf362c286ad485f3ea391446
+size 5919456
diff --git a/checkpoint-70/optimizer.pt b/checkpoint-70/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9877539eef920cfa781fda1728a861a7b27858b9
--- /dev/null
+++ b/checkpoint-70/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c9e85ad0fac24ae041b6c25e8f46c0c574f9bf60c2e996e8b22cd66acce496e
+size 11930938
diff --git a/checkpoint-70/rng_state_0.pth b/checkpoint-70/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b63151224da344101d7c5f9d3b321970ae59624c
--- /dev/null
+++ b/checkpoint-70/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3bd9a76af42011b67455136c010822d73652cc5a1906ab585a0dc85da705df4f
+size 15024
diff --git a/checkpoint-70/rng_state_1.pth b/checkpoint-70/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7315b6da721c316e141e62dbe52f6e4f22167db4
--- /dev/null
+++ b/checkpoint-70/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30a1fff25571e8a55aa610c430d18e6cd45f9c001032401ace33bd0acfca569c
+size 15024
diff --git a/checkpoint-70/rng_state_2.pth b/checkpoint-70/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9881002aee7fa02076c4b828d615a04b6fd62976
--- /dev/null
+++ b/checkpoint-70/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b01196072c8cc109737dd5b3b63d9d7ca023ea19e3fc3cf6940d7b2ce8212c3d
+size 15024
diff --git a/checkpoint-70/rng_state_3.pth b/checkpoint-70/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a7559c9f3227d79c916332a1fb9fd2062183b44b
--- /dev/null
+++ b/checkpoint-70/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35448ff23a607170874ca44851806012e8ac2b41960a520b5afed0aaa3c68904
+size 15024
diff --git a/checkpoint-70/scheduler.pt b/checkpoint-70/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..408eb7322e25bf1b3c41df31ebb74dd436d585a7
--- /dev/null
+++ b/checkpoint-70/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b552e7344582191a3d8cdc4914ffe2a2ff20c71614d40f449e861bb8dd86cbfe
+size 1064
diff --git a/checkpoint-70/trainer_state.json b/checkpoint-70/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..354f59554883fe8438f611a515e03ac45a226043
--- /dev/null
+++ b/checkpoint-70/trainer_state.json
@@ -0,0 +1,70 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.005079457223713809,
+  "eval_steps": 500,
+  "global_step": 70,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-70/training_args.bin b/checkpoint-70/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-70/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-80/README.md b/checkpoint-80/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-80/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-80/adapter_config.json b/checkpoint-80/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-80/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-80/adapter_model.safetensors b/checkpoint-80/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ae302485718d6be06ecf148383fc31fdebfd9346
--- /dev/null
+++ b/checkpoint-80/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34643a7560339a2c4d619df91d45e8601a26690da83abae7d69c835ed8999b85
+size 5919456
diff --git a/checkpoint-80/optimizer.pt b/checkpoint-80/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..68b8cabd0d108cd1b58fe0f28ea6f64ffdf5be41
--- /dev/null
+++ b/checkpoint-80/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87e271d3f5250f29dd0b9c0d156f6d264b4c3a4fd22680cdc9058fb94960b604
+size 11930938
diff --git a/checkpoint-80/rng_state_0.pth b/checkpoint-80/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..76720b9437dc08ee3e12964d6811f69c89fd7c79
--- /dev/null
+++ b/checkpoint-80/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af950c724214e25a469ed811cc52a5d554e829ceb6c1cf7a03bdc24c86d6e0b9
+size 15024
diff --git a/checkpoint-80/rng_state_1.pth b/checkpoint-80/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..004394f1e8517ab729806995011b19aad2a89178
--- /dev/null
+++ b/checkpoint-80/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed86885179845003190a32cdc33e19f0053060bbdfa9a5b23e5b48eff7e4e9c8
+size 15024
diff --git a/checkpoint-80/rng_state_2.pth b/checkpoint-80/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..de54d59c561ea09a5172f38ed86e037fdccbf896
--- /dev/null
+++ b/checkpoint-80/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24aec4517e40fd3b85c1bd78e397be1f786eaa07c086070755f93d9be01a8484
+size 15024
diff --git a/checkpoint-80/rng_state_3.pth b/checkpoint-80/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..30240adedb98a17f51025bcf6635dd0cf7b25d2d
--- /dev/null
+++ b/checkpoint-80/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c01947a5eaede74837243ff3c9d13af7d57cff8f3a4d3c5f34561778bb348151
+size 15024
diff --git a/checkpoint-80/scheduler.pt b/checkpoint-80/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d3283a05efc3be9a1bf147fbc9460fb834ebf396
--- /dev/null
+++ b/checkpoint-80/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a67bb0768bae7351b61f7be209327f4546a91ea0abaf1837ae1bf9461be78b9c
+size 1064
diff --git a/checkpoint-80/trainer_state.json b/checkpoint-80/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..357923f14ce654b59953f8270b1617da5dd8759c
--- /dev/null
+++ b/checkpoint-80/trainer_state.json
@@ -0,0 +1,77 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.0058050939699586385,
+  "eval_steps": 500,
+  "global_step": 80,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-80/training_args.bin b/checkpoint-80/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-80/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984
diff --git a/checkpoint-90/README.md b/checkpoint-90/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64d55750c9c68c8ef0ccff2f65b1fd32a16578a9
--- /dev/null
+++ b/checkpoint-90/README.md
@@ -0,0 +1,202 @@
+---
+library_name: peft
+base_model: ../pretrained/MiniCPM-2B-dpo-bf16/
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.10.0
\ No newline at end of file
diff --git a/checkpoint-90/adapter_config.json b/checkpoint-90/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0eb9db7c6b0c827aba0e86e5057f651b0f868c9
--- /dev/null
+++ b/checkpoint-90/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "../pretrained/MiniCPM-2B-dpo-bf16/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
\ No newline at end of file
diff --git a/checkpoint-90/adapter_model.safetensors b/checkpoint-90/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5fcc0de2919c72c3454209860ce6d360daf871b0
--- /dev/null
+++ b/checkpoint-90/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4bbf9ce9b405563dd9a84786e65579a092d69fbdb56b107c2a8ef28613980869
+size 5919456
diff --git a/checkpoint-90/optimizer.pt b/checkpoint-90/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..13aa7d206ccb6f469164d96b0f72e0b453a44f01
--- /dev/null
+++ b/checkpoint-90/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:249c6c912c16a077aa4fd73fdcce2c503a390894dc58df9a223157f8486c682f
+size 11930938
diff --git a/checkpoint-90/rng_state_0.pth b/checkpoint-90/rng_state_0.pth
new file mode 100644
index 0000000000000000000000000000000000000000..19720e2b18076fa2ead67de04ee9c96964f4ace1
--- /dev/null
+++ b/checkpoint-90/rng_state_0.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3959da6672a208373721817cf0e3832571aa7268aa9e524fa17dfd5d50cbed40
+size 15024
diff --git a/checkpoint-90/rng_state_1.pth b/checkpoint-90/rng_state_1.pth
new file mode 100644
index 0000000000000000000000000000000000000000..24d076615f4330fab138043f520f9f161babe2ab
--- /dev/null
+++ b/checkpoint-90/rng_state_1.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f3b85410c8672b9af1749593b68135a96800eca07b66320e42e9061f80da5f2
+size 15024
diff --git a/checkpoint-90/rng_state_2.pth b/checkpoint-90/rng_state_2.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4437274ab7ddcbd85c29a0856b3f9c375d14acac
--- /dev/null
+++ b/checkpoint-90/rng_state_2.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef04fc5cb5439ad39bd641e4aa7ed5be94ac766d2b3a825174e947dab98b4585
+size 15024
diff --git a/checkpoint-90/rng_state_3.pth b/checkpoint-90/rng_state_3.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3efbeb495f36c856b57dd8fadae99936c837c806
--- /dev/null
+++ b/checkpoint-90/rng_state_3.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b0ee1ee170078fd3882035945fc254ebb8766d78dfd4d44f2075cedab8774df
+size 15024
diff --git a/checkpoint-90/scheduler.pt b/checkpoint-90/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d888473538678f691ae72340b59cf830322ef3be
--- /dev/null
+++ b/checkpoint-90/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b432d77df5e792101e943919fb0caf7f4ec7b95e573274866242a3e2703f1bb
+size 1064
diff --git a/checkpoint-90/trainer_state.json b/checkpoint-90/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..2612b8b27bb29800aea023e05ba3ef2fd681e9bc
--- /dev/null
+++ b/checkpoint-90/trainer_state.json
@@ -0,0 +1,84 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.006530730716203468,
+  "eval_steps": 500,
+  "global_step": 90,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0007256367462448298,
+      "grad_norm": 12.0,
+      "learning_rate": 5e-05,
+      "loss": 3.0993,
+      "step": 10
+    },
+    {
+      "epoch": 0.0014512734924896596,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0001,
+      "loss": 2.208,
+      "step": 20
+    },
+    {
+      "epoch": 0.0021769102387344894,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00015,
+      "loss": 1.3285,
+      "step": 30
+    },
+    {
+      "epoch": 0.0029025469849793192,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0002,
+      "loss": 0.6895,
+      "step": 40
+    },
+    {
+      "epoch": 0.003628183731224149,
+      "grad_norm": 4.15625,
+      "learning_rate": 0.00025,
+      "loss": 0.714,
+      "step": 50
+    },
+    {
+      "epoch": 0.004353820477468979,
+      "grad_norm": 4.71875,
+      "learning_rate": 0.0003,
+      "loss": 0.4849,
+      "step": 60
+    },
+    {
+      "epoch": 0.005079457223713809,
+      "grad_norm": 3.828125,
+      "learning_rate": 0.00035,
+      "loss": 0.3671,
+      "step": 70
+    },
+    {
+      "epoch": 0.0058050939699586385,
+      "grad_norm": 4.03125,
+      "learning_rate": 0.0004,
+      "loss": 0.5693,
+      "step": 80
+    },
+    {
+      "epoch": 0.006530730716203468,
+      "grad_norm": 3.296875,
+      "learning_rate": 0.00045000000000000004,
+      "loss": 0.4133,
+      "step": 90
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 13781,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 10,
+  "total_flos": 0.0,
+  "train_batch_size": 5,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-90/training_args.bin b/checkpoint-90/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d5e801f990cb70d84bd631ef6b3738864bdfd3d
--- /dev/null
+++ b/checkpoint-90/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf317b5a0322fdf5b8c610bfc4afb6ece34736fcb1bea30f92c1212b9b53d69
+size 4984