diff --git a/llama-factory/config/models/Qwen2.5-0.5B-Instruct.yaml b/llama-factory/config/models/Qwen2.5-0.5B-Instruct.yaml new file mode 100644 index 0000000000000000000000000000000000000000..492d7bbd5e816d44cec0df4a5fcd0d35a765eebb --- /dev/null +++ b/llama-factory/config/models/Qwen2.5-0.5B-Instruct.yaml @@ -0,0 +1,43 @@ +### model +model_name_or_path: Qwen/Qwen2.5-0.5B-Instruct + +### method +stage: sft +do_train: true +finetuning_type: lora +lora_target: all + +### dataset +dataset: alpaca_mgtv_p2 +template: qwen +cutoff_len: 8192 +max_samples: 25000 +overwrite_cache: true +preprocessing_num_workers: 16 + +### output +output_dir: saves/Qwen2.5-0.5B-Instruct +logging_steps: 5 +save_steps: 35 +plot_loss: true +# overwrite_output_dir: true + +### train +per_device_train_batch_size: 16 +gradient_accumulation_steps: 8 +learning_rate: 1.0e-4 +num_train_epochs: 2.0 +lr_scheduler_type: cosine +warmup_ratio: 0.1 +bf16: true +ddp_timeout: 180000000 + +### eval +val_size: 0.1 +per_device_eval_batch_size: 1 +eval_strategy: steps +eval_steps: 35 + +# report_to: wandb +report_to: none +run_name: Qwen2.5-0.5B-Instruct_lora_sft diff --git a/llama-factory/config/models/Qwen2.5-1.5B-Instruct.yaml b/llama-factory/config/models/Qwen2.5-1.5B-Instruct.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f4d3fff3174736cd9712575865af44670ba29caa --- /dev/null +++ b/llama-factory/config/models/Qwen2.5-1.5B-Instruct.yaml @@ -0,0 +1,43 @@ +### model +model_name_or_path: Qwen/Qwen2.5-1.5B-Instruct + +### method +stage: sft +do_train: true +finetuning_type: lora +lora_target: all + +### dataset +dataset: alpaca_mgtv_p2 +template: qwen +cutoff_len: 8192 +max_samples: 25000 +overwrite_cache: true +preprocessing_num_workers: 16 + +### output +output_dir: saves/Qwen2.5-1.5B-Instruct +logging_steps: 5 +save_steps: 35 +plot_loss: true +# overwrite_output_dir: true + +### train +per_device_train_batch_size: 16 +gradient_accumulation_steps: 8 +learning_rate: 1.0e-4 +num_train_epochs: 2.0 +lr_scheduler_type: cosine +warmup_ratio: 0.1 +bf16: true +ddp_timeout: 180000000 + +### eval +val_size: 0.1 +per_device_eval_batch_size: 1 +eval_strategy: steps +eval_steps: 35 + +# report_to: wandb +report_to: none +run_name: Qwen2.5-1.5B-Instruct_lora_sft diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/README.md b/llama-factory/saves/Qwen2.5-0.5B-Instruct/README.md new file mode 100644 index 0000000000000000000000000000000000000000..37b6ebeb2fc9947dae1afa316369584e1674c3af --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/README.md @@ -0,0 +1,73 @@ +--- +license: other +library_name: peft +tags: +- llama-factory +- lora +- generated_from_trainer +base_model: Qwen/Qwen2.5-0.5B-Instruct +model-index: +- name: Qwen2.5-0.5B-Instruct + results: [] +--- + + + +# Qwen2.5-0.5B-Instruct + +This model is a fine-tuned version of [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) on the alpaca_mgtv_p2 dataset. +It achieves the following results on the evaluation set: +- Loss: 0.2634 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 0.0001 +- train_batch_size: 16 +- eval_batch_size: 1 +- seed: 42 +- gradient_accumulation_steps: 8 +- total_train_batch_size: 128 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 2.0 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | +|:-------------:|:------:|:----:|:---------------:| +| 0.4381 | 0.1990 | 35 | 0.3907 | +| 0.3556 | 0.3980 | 70 | 0.3124 | +| 0.3228 | 0.5970 | 105 | 0.3359 | +| 0.3026 | 0.7960 | 140 | 0.2879 | +| 0.2802 | 0.9950 | 175 | 0.2749 | +| 0.2645 | 1.1940 | 210 | 0.2682 | +| 0.3012 | 1.3930 | 245 | 0.2593 | +| 0.2628 | 1.5920 | 280 | 0.2647 | +| 0.2493 | 1.7910 | 315 | 0.2653 | +| 0.2642 | 1.9900 | 350 | 0.2634 | + + +### Framework versions + +- PEFT 0.11.1 +- Transformers 4.43.3 +- Pytorch 2.3.1+cu121 +- Datasets 2.20.0 +- Tokenizers 0.19.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/adapter_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8c7f1beb900e9e2385a8fc1a8be43468cedbcd94 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50274fc633d35196b94331c0c7bdb7fd9014c611d430da0631ef5848ba676fb3 +size 727 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-0.5B-Instruct/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac863e080a61e13f1190d801a9ebc1b333105c99 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c29ee2e16f2701ba7d8d0366fda6ad2724e99b9a20b1ed1a14d65bd4ac979e3f +size 17640136 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/added_tokens.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/all_results.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f7378cd19b6704f308fd2153db9ac1d1a071bc3e --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/all_results.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c53b468ad3f40129c611e94b4f9be7284a845ced74b4a6e446e34a856d633d2 +size 367 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/README.md b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/README.md new file mode 100644 index 0000000000000000000000000000000000000000..30886db19aae11d43ebfe252b3f3b41e5b22a57b --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-0.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/adapter_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8c7f1beb900e9e2385a8fc1a8be43468cedbcd94 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50274fc633d35196b94331c0c7bdb7fd9014c611d430da0631ef5848ba676fb3 +size 727 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7634e2cfe40d985f774e741d0b7d68c649a1dd74 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f682162275d78ace47a9f495319632c3611876a5208c5699d2c7f29626f0e147 +size 17640136 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/added_tokens.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/merges.txt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/optimizer.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..10ccd0857ce5a3ebb89aab35fce927310454663b --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e469c532d00f4d489f4955771c78f47d9254823843e9decfb65d3972a0d154b +size 35474626 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/rng_state.pth b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0a11d850e4f9ddde03041fb0f3247ef616fbda77 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d138cfe3a4adf21f048848ee35837c9a757a0a3616ff7adbb45b69aac247435 +size 14244 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/scheduler.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..31341807bd31628f4c98a98fdf9521811486cae0 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1df35574ae0ca5dc85d2b83a77932ee5ca941a642bbe53370339fdfdc60d04fe +size 1064 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/special_tokens_map.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/tokenizer.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/tokenizer_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/trainer_state.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..579ad7d2ca11d667dd22396ff9867aefffa2e500 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e409c536ad2cf452b2e558b6bccd59dc83eedc5e585c6e9b933cce42bf0ee631 +size 5006 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/training_args.bin b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e3ff63bef726a38b8bcd657b66513ca67d16b62 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a464f2a1e0bd5fe1bb03e81fe1e5f27c94ac4a2182358860ddb4b5363339161 +size 5368 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/vocab.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-105/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/README.md b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/README.md new file mode 100644 index 0000000000000000000000000000000000000000..30886db19aae11d43ebfe252b3f3b41e5b22a57b --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-0.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/adapter_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8c7f1beb900e9e2385a8fc1a8be43468cedbcd94 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50274fc633d35196b94331c0c7bdb7fd9014c611d430da0631ef5848ba676fb3 +size 727 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f8922c6769525b02dd4af03a9ec298f07accd11 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3581f41fe78b0c0dbc3d27f75bc86b68bfd524d48062d21d5b23e7e3c90f6f48 +size 17640136 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/added_tokens.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/merges.txt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/optimizer.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9765703e1f47b785266c1303914157ad4ad79843 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65470cfb110d0b028222348f87fed3f5819fdf3940ea571539c1624926dedf0b +size 35474626 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/rng_state.pth b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..06c25b71551e4830f82f7687d2345c7fbd987daa --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b3ee827a7a00012c0a116546df467feee35e70376d81a7a85b1a70eb90414d3 +size 14244 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/scheduler.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..61cce90db59762d086e6d3b6852a405fa6be19b9 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d84219d2b98872e152b7eefd22ea5f06fe28a80c80055cd7d460c7c6e20ea10 +size 1064 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/special_tokens_map.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/tokenizer.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/tokenizer_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/trainer_state.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c701fb1eb20a57dde6546bdecabd9d4568800f37 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f91e33bba65e1f19225cae1ace3b63ff9461d6115f37e512601a68dc79533c6f +size 6431 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/training_args.bin b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e3ff63bef726a38b8bcd657b66513ca67d16b62 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a464f2a1e0bd5fe1bb03e81fe1e5f27c94ac4a2182358860ddb4b5363339161 +size 5368 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/vocab.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-140/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/README.md b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/README.md new file mode 100644 index 0000000000000000000000000000000000000000..30886db19aae11d43ebfe252b3f3b41e5b22a57b --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-0.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/adapter_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8c7f1beb900e9e2385a8fc1a8be43468cedbcd94 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50274fc633d35196b94331c0c7bdb7fd9014c611d430da0631ef5848ba676fb3 +size 727 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f8b9b5dae6c597abdee623a71df24c31d3735df1 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b395e0fe356354657b39f2aa75e26b73d6daf9b8d9cd2b55d03cf24db23c4288 +size 17640136 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/added_tokens.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/merges.txt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/optimizer.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..79b62af1f67bb23b176fd50389ec5dea0de0b537 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98cebd0da09edee76848c84a525c55272c2e0ee5ae00b10e19c9b989ba289808 +size 35474626 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/rng_state.pth b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..41dfa7d7903dea42d227bad638c2c750928d590c --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c062f7f375beded48b5337f5a3f3a5cb38807fa3e85dbf3e294c0ab6b627bfc2 +size 14244 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/scheduler.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..caf175aaec2f1527a696c358e4aa29e516403eb7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c52b9ccde85457c1de9dfacdf0248d968713afb8256de2ba990f67de54c96d00 +size 1064 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/special_tokens_map.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/tokenizer.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/tokenizer_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/trainer_state.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0062e5f8e9ece94558509b9567a5ce1a51820f7a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeb8c3cf58f45591e26488cfd44f99ef8fbdbb7cb1b7ed9013556091126638e5 +size 7857 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/training_args.bin b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e3ff63bef726a38b8bcd657b66513ca67d16b62 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a464f2a1e0bd5fe1bb03e81fe1e5f27c94ac4a2182358860ddb4b5363339161 +size 5368 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/vocab.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-175/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/README.md b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/README.md new file mode 100644 index 0000000000000000000000000000000000000000..30886db19aae11d43ebfe252b3f3b41e5b22a57b --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-0.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/adapter_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8c7f1beb900e9e2385a8fc1a8be43468cedbcd94 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50274fc633d35196b94331c0c7bdb7fd9014c611d430da0631ef5848ba676fb3 +size 727 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c7c80b489fea8cca2ef82e3a61f691ad9f659370 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:685f5e5f6e9cf7f5b4aa06bc7f59d230f3204cbdc1427c94bed8969d1d4f269e +size 17640136 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/added_tokens.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/merges.txt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/optimizer.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fef53038cb4c50012b19024e3fbcd1bbd411b93b --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1f4dd2d5770271613057aa34ec8636d8770b04ae1762b4fdc04b614e26f7c2e +size 35474626 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/rng_state.pth b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..407e47abbdfb90afd3e1f979b5c0260135d2050e --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9899ccda7f0d8d9511991180b93aab508ce6e8489de708c88ad1188e7e1d90d6 +size 14244 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/scheduler.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..419b1e14bcc9207421651855b0ff80e2794ef324 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:899dadd74ba016d8a701a0e5dc593ac31e21ddf1b80316db30ef9fa873894423 +size 1064 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/special_tokens_map.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/tokenizer.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/tokenizer_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/trainer_state.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..122b6a79417caed55d3c6a02fe82d4d81adb010f --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:458116da27d68dee377da30737897373f8e7c60bc60203a6f33a4b44c07a018d +size 9280 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/training_args.bin b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e3ff63bef726a38b8bcd657b66513ca67d16b62 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a464f2a1e0bd5fe1bb03e81fe1e5f27c94ac4a2182358860ddb4b5363339161 +size 5368 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/vocab.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-210/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/README.md b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/README.md new file mode 100644 index 0000000000000000000000000000000000000000..30886db19aae11d43ebfe252b3f3b41e5b22a57b --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-0.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/adapter_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8c7f1beb900e9e2385a8fc1a8be43468cedbcd94 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50274fc633d35196b94331c0c7bdb7fd9014c611d430da0631ef5848ba676fb3 +size 727 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc0d10adbddcaf30dbbf1745f08bfd9428db5a49 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4be4ea527017822fc5a63d5b3bbc4efd12fcbff368dc5c57a470b1bbe7ce0b60 +size 17640136 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/added_tokens.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/merges.txt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/optimizer.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..57625c5d46f0183fadb99650e3d5b1cccefd27f0 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc9e70697b9c2ddab26919e8101af3c3bf9b4b63367669008850a0ae7f434fd +size 35474626 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/rng_state.pth b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..585f2a6a28a143f55b2a0574f949d7f87bb22a65 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48ee9b73399c28d7e668360bf1d5a4d11095c4738bf96c13f7bb6fbff59f8ccb +size 14244 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/scheduler.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..06881b899f23cfe38e114beffd6a2057ea8fadf6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bce9fd37ac7a73d2e522f6aa056fe3582cfe6e7367584d378837991de3ebbf16 +size 1064 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/special_tokens_map.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/tokenizer.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/tokenizer_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/trainer_state.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a38a814023579b16d39bc43720f9609cadeb7428 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:021b124ad9c2f9de4b7022a7617925ca3b3981931461f41cbc8859cdf499b5da +size 10707 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/training_args.bin b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e3ff63bef726a38b8bcd657b66513ca67d16b62 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a464f2a1e0bd5fe1bb03e81fe1e5f27c94ac4a2182358860ddb4b5363339161 +size 5368 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/vocab.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-245/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/README.md b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/README.md new file mode 100644 index 0000000000000000000000000000000000000000..30886db19aae11d43ebfe252b3f3b41e5b22a57b --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-0.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/adapter_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8c7f1beb900e9e2385a8fc1a8be43468cedbcd94 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50274fc633d35196b94331c0c7bdb7fd9014c611d430da0631ef5848ba676fb3 +size 727 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cc7fb68abc5081c6f094733618c27ce2fdf1c5f4 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03b0186c52e11381e4875e7815e62f7e1eb7f03a42222b152f64703f80b6ed3c +size 17640136 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/added_tokens.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/merges.txt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/optimizer.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..25e217480aaa27cdd5362008ea09b82c54b911a6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51a8d5195b96e28a650767d560b45acf0df143851ddfcfb9f007898c9daa5f9e +size 35474626 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/rng_state.pth b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..87383f4346e5c6eea0e725de97c392797ed938b5 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d245e05e72192c132e0f2edb6fdcae0c578c890f0fe912f17ec7b0bba2d38cc3 +size 14244 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/scheduler.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8524c54f7e80c306ab69001ab3e5ce83c067a269 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48872d731120fe18674f751953b72c38d9a966768719eb9601a17faaf31a5957 +size 1064 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/special_tokens_map.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/tokenizer.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/tokenizer_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/trainer_state.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..906237eed908c6c40065cf78b7fc850e85895375 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38d882b22a0d90ed255235dfeaba0ac596ee5f8ecad1b941b80e17863334444c +size 12135 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/training_args.bin b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e3ff63bef726a38b8bcd657b66513ca67d16b62 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a464f2a1e0bd5fe1bb03e81fe1e5f27c94ac4a2182358860ddb4b5363339161 +size 5368 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/vocab.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-280/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/README.md b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/README.md new file mode 100644 index 0000000000000000000000000000000000000000..30886db19aae11d43ebfe252b3f3b41e5b22a57b --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-0.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/adapter_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8c7f1beb900e9e2385a8fc1a8be43468cedbcd94 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50274fc633d35196b94331c0c7bdb7fd9014c611d430da0631ef5848ba676fb3 +size 727 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..218df542ffdf5b69c52774fab180f1a554791cb5 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:901cec5fad99fee7917a9165895530097ee1f9ad02616068204c4646c0cbba09 +size 17640136 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/added_tokens.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/merges.txt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/optimizer.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..01df6423440c94a1171cc7a74c7e13a86975da8f --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99c62f0dfa6745a8f685a3f33d1dea2127b30052f4273e8fd0bb676136558db3 +size 35474626 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/rng_state.pth b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..87571ba044576778d1d6e555eff20ea04c20bbab --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5edb34d031c0c2b447f3eaadb401a4c1e7e7e6d8c096e28b7092e01a8bd48c92 +size 14244 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/scheduler.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e862fd38e2c7cdddf014a017679cfb606782bbfd --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d3743edaf33aa6d682b5e3cd55c73686d21dd7b15b0ef47ae54db03259173ce +size 1064 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/special_tokens_map.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/tokenizer.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/tokenizer_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/trainer_state.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b948dc97be3036beab7371e092db4d5677e7b267 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6ee30111f0f899279f0249fee458b99a841e30d32d13bf70aa34370725d3d44 +size 13560 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/training_args.bin b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e3ff63bef726a38b8bcd657b66513ca67d16b62 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a464f2a1e0bd5fe1bb03e81fe1e5f27c94ac4a2182358860ddb4b5363339161 +size 5368 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/vocab.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-315/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/README.md b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/README.md new file mode 100644 index 0000000000000000000000000000000000000000..30886db19aae11d43ebfe252b3f3b41e5b22a57b --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-0.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/adapter_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8c7f1beb900e9e2385a8fc1a8be43468cedbcd94 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50274fc633d35196b94331c0c7bdb7fd9014c611d430da0631ef5848ba676fb3 +size 727 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..462d6d533675faedea43a368999e264f4b6d5ff0 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:608b63c3df40830b70e9809e4e6b9e5bcb84416c4af09d8ea38f85d59dc4e8a9 +size 17640136 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/added_tokens.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/merges.txt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/optimizer.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..634f340329e926dcca7adbd302a73a286b58c2a3 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9f7fe54e8ccf277ff52737356488592ce0528b2e331c3e9b266f2428b30ae87 +size 35474626 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/rng_state.pth b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..33cefe6919222ddfa3c3946df69b8e5c5a17a0fc --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff264f99d31b522cc7e2a4eac9d38606d0c58a34c0adc74d71e0ca8b371dc36 +size 14244 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/scheduler.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..73bbef1c4091057200c13a251b858c508a5f73cf --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:713eaf28d14874341960ad352a5374454ad5735a0a3ccca502696adbb018f8cd +size 1064 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/special_tokens_map.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/tokenizer.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/tokenizer_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/trainer_state.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..462fd2d49c4430837a3a105a994679c232fa61ed --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:849f8f298cc74303a67a6ca31d605cf21c6fe1a9142b800cfdee09b00b224971 +size 2165 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/training_args.bin b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e3ff63bef726a38b8bcd657b66513ca67d16b62 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a464f2a1e0bd5fe1bb03e81fe1e5f27c94ac4a2182358860ddb4b5363339161 +size 5368 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/vocab.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-35/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/README.md b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/README.md new file mode 100644 index 0000000000000000000000000000000000000000..30886db19aae11d43ebfe252b3f3b41e5b22a57b --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-0.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/adapter_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8c7f1beb900e9e2385a8fc1a8be43468cedbcd94 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50274fc633d35196b94331c0c7bdb7fd9014c611d430da0631ef5848ba676fb3 +size 727 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac863e080a61e13f1190d801a9ebc1b333105c99 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c29ee2e16f2701ba7d8d0366fda6ad2724e99b9a20b1ed1a14d65bd4ac979e3f +size 17640136 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/added_tokens.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/merges.txt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/optimizer.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9bf86e111964d8638a60554a5939d05c030fd64d --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6ecb9a5ad0feea62777b8450844108bab95eea07bcdcbf3e51799d10941962c +size 35474626 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/rng_state.pth b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..58d17c224a6607097f486f1f48f823a1a17c6ef9 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3e5d946241df2516b06d7074d8779088eae7607173ad780df56583910a9589b +size 14244 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/scheduler.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f76c07a3f0e64c9417333cce47e5b470958a860d --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70732dec4f14211b78555ce1acfc091b59c12a05595924c42e5da10d3a62cd6d +size 1064 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/special_tokens_map.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/tokenizer.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/tokenizer_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/trainer_state.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..dfdf52c97b08b853e327b9c8ccfb0eb63d9a8a3b --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48f27c6e648665110f66c7dcc5176f44cc76fa40c9bc4637e893cdab01626e72 +size 14967 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/training_args.bin b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e3ff63bef726a38b8bcd657b66513ca67d16b62 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a464f2a1e0bd5fe1bb03e81fe1e5f27c94ac4a2182358860ddb4b5363339161 +size 5368 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/vocab.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-350/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/README.md b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/README.md new file mode 100644 index 0000000000000000000000000000000000000000..30886db19aae11d43ebfe252b3f3b41e5b22a57b --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-0.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/adapter_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8c7f1beb900e9e2385a8fc1a8be43468cedbcd94 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50274fc633d35196b94331c0c7bdb7fd9014c611d430da0631ef5848ba676fb3 +size 727 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..536ae9b218d038e3d8d242a015627c9b046fe6f8 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:665b37dd475ad6e5fba914526fd0ca965a86150098f2a239fe4ddf73ebae97e6 +size 17640136 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/added_tokens.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/merges.txt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/optimizer.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..57682441600c4bdfa648b33a0ad3ac6e41f54995 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f9d2931290a49969149e0e9e63bc6eb55151a7a97a1bc765298a4211b8fc310 +size 35474626 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/rng_state.pth b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..323d589b0258d085c7a07b0c4f97b26796e865c5 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9196a1e708bf24d6abba41cce3f8558820acc3e50f9394c5955e29eb41ffea3d +size 14244 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/scheduler.pt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..43271f9b860a5a3e1ca9e072a5f84047795b51de --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1a6560ee67f398abb4366caa0f28dce005bfe2be169777509fc1f459e63edc6 +size 1064 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/special_tokens_map.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/tokenizer.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/tokenizer_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/trainer_state.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c6e6afba9c590d35430e837b08e8cd6f032e464e --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb0f97d33dc1ec00a5e5e9d1be6acc61a36348793e9df9eb40fd72b626c29762 +size 3586 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/training_args.bin b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e3ff63bef726a38b8bcd657b66513ca67d16b62 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a464f2a1e0bd5fe1bb03e81fe1e5f27c94ac4a2182358860ddb4b5363339161 +size 5368 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/vocab.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/checkpoint-70/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/eval_results.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c9f865f5c3e7cc3ccf1cac5cd83ba8ee0fceb69a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/eval_results.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9d836f4b8b543706f5cd6ae3720ca9118ee1be66d9af7824e797bf3ce3fbec3 +size 178 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/merges.txt b/llama-factory/saves/Qwen2.5-0.5B-Instruct/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/special_tokens_map.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/tokenizer.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/tokenizer_config.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/train_results.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e0d8bbb99c98378136b77ee75dbcaa2b87dd1d92 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/train_results.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d92356e4b38df148193e3366520fd39fcc6294c541ce573aca093ce31a3e4a50 +size 224 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/trainer_log.jsonl b/llama-factory/saves/Qwen2.5-0.5B-Instruct/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2602cc75cd10bf46ff449b07d08c05c962dec486 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/trainer_log.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db1e28a0f5aa5b88a065159bbceff44be0981e2d35b7b5f88a21c26d4c81091c +size 16667 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/trainer_state.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..437cef7eb2669eb7d8556eb59ecb8bf6e92463c2 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9700ec49ff4cbe848da4e41410bb5668064ccee529df6394842a15a28dec7459 +size 15232 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/training_args.bin b/llama-factory/saves/Qwen2.5-0.5B-Instruct/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e3ff63bef726a38b8bcd657b66513ca67d16b62 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a464f2a1e0bd5fe1bb03e81fe1e5f27c94ac4a2182358860ddb4b5363339161 +size 5368 diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/training_eval_loss.png b/llama-factory/saves/Qwen2.5-0.5B-Instruct/training_eval_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..ecc8e4e665925ffc8bbcad74c1a1f0022732370a Binary files /dev/null and b/llama-factory/saves/Qwen2.5-0.5B-Instruct/training_eval_loss.png differ diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/training_loss.png b/llama-factory/saves/Qwen2.5-0.5B-Instruct/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..374c784e55838fd65a068c562ba50f3a576405db Binary files /dev/null and b/llama-factory/saves/Qwen2.5-0.5B-Instruct/training_loss.png differ diff --git a/llama-factory/saves/Qwen2.5-0.5B-Instruct/vocab.json b/llama-factory/saves/Qwen2.5-0.5B-Instruct/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-0.5B-Instruct/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/README.md b/llama-factory/saves/Qwen2.5-1.5B-Instruct/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3e28479b5f17af0143516049ecd7534eab35fa11 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/README.md @@ -0,0 +1,73 @@ +--- +license: other +library_name: peft +tags: +- llama-factory +- lora +- generated_from_trainer +base_model: Qwen/Qwen2.5-1.5B-Instruct +model-index: +- name: Qwen2.5-1.5B-Instruct + results: [] +--- + + + +# Qwen2.5-1.5B-Instruct + +This model is a fine-tuned version of [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) on the alpaca_mgtv_p2 dataset. +It achieves the following results on the evaluation set: +- Loss: 0.2388 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 0.0001 +- train_batch_size: 16 +- eval_batch_size: 1 +- seed: 42 +- gradient_accumulation_steps: 8 +- total_train_batch_size: 128 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 2.0 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | +|:-------------:|:------:|:----:|:---------------:| +| 0.4191 | 0.1990 | 35 | 0.3819 | +| 0.3251 | 0.3980 | 70 | 0.2896 | +| 0.3081 | 0.5970 | 105 | 0.2995 | +| 0.2825 | 0.7960 | 140 | 0.2568 | +| 0.2693 | 0.9950 | 175 | 0.2491 | +| 0.2466 | 1.1940 | 210 | 0.2544 | +| 0.2733 | 1.3930 | 245 | 0.2386 | +| 0.2396 | 1.5920 | 280 | 0.2390 | +| 0.2373 | 1.7910 | 315 | 0.2412 | +| 0.2413 | 1.9900 | 350 | 0.2388 | + + +### Framework versions + +- PEFT 0.11.1 +- Transformers 4.43.3 +- Pytorch 2.3.1+cu121 +- Datasets 2.20.0 +- Tokenizers 0.19.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/adapter_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6a0f1f3765fdd9d8d1d20411cc0649b1e819d4e9 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297724093628fd96dcff09194a9bfece227f9834572607cec9130dd1dd50c91f +size 727 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-1.5B-Instruct/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ee6f5e2276c193004c17d84b4f666bd86c5f054 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cf2dff6f30153732831ae688716fdcd40224275cf9c87e1ffda80e695f17fb6 +size 36981072 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/added_tokens.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/all_results.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5d16651920e052920f5a26ea87ff23ebdbfc1467 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/all_results.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3198538fb8e5055380dd8bbe57bd5e19742b12bd1dcc9a429c41dd5b4edffb86 +size 368 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/README.md b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/README.md new file mode 100644 index 0000000000000000000000000000000000000000..df1e6579f8514067746f100036eee293925136db --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-1.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/adapter_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6a0f1f3765fdd9d8d1d20411cc0649b1e819d4e9 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297724093628fd96dcff09194a9bfece227f9834572607cec9130dd1dd50c91f +size 727 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c170d835a39985b0ee799617c68d1524ad1de9a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c045abb5c262c7edc8210e5f4441a79f67bc09433996c527d802b89c35b5bf62 +size 36981072 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/added_tokens.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/merges.txt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/optimizer.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f0f6eb1a353aa7c496e2c108463d60ce7f6e7d2 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0bcec2b3a0fc00aa5d1a97ca4e3f61ef88e5e197e0ca86a09ae57c776351a5b +size 74188650 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/rng_state.pth b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0a11d850e4f9ddde03041fb0f3247ef616fbda77 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d138cfe3a4adf21f048848ee35837c9a757a0a3616ff7adbb45b69aac247435 +size 14244 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/scheduler.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..31341807bd31628f4c98a98fdf9521811486cae0 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1df35574ae0ca5dc85d2b83a77932ee5ca941a642bbe53370339fdfdc60d04fe +size 1064 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/special_tokens_map.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/tokenizer.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/tokenizer_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/trainer_state.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9f743f54e39d9b6f8fdd613f60b7164ed101752e --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b6dcf7a2986fce160a1f9491c32bbc2c36d57cf1ff1a36c1332bad0266c64e5 +size 5008 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/training_args.bin b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ebf1dace1dcc1910d72f500623fa5b6d16a55c7e --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcac0eab69de7e7d1596125825d3cbcfd9d548b70dea2145474207fda49a701f +size 5368 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/vocab.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-105/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/README.md b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/README.md new file mode 100644 index 0000000000000000000000000000000000000000..df1e6579f8514067746f100036eee293925136db --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-1.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/adapter_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6a0f1f3765fdd9d8d1d20411cc0649b1e819d4e9 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297724093628fd96dcff09194a9bfece227f9834572607cec9130dd1dd50c91f +size 727 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe33c273f9251dd96c4b4d65bbda6f40dd995dc3 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:054f8cc03ae28a58a96d22ead58f88aeb114cce9df3f5b5d55c7e0c4e8c39cd1 +size 36981072 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/added_tokens.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/merges.txt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/optimizer.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..46453ad79cf2ef86b7ce034b2dcb41ec63910fef --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b72ee1603c8662bf956f6c4b3d41d569b2b05454f748a0e433ace4fb8c1a0ad +size 74188650 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/rng_state.pth b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..06c25b71551e4830f82f7687d2345c7fbd987daa --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b3ee827a7a00012c0a116546df467feee35e70376d81a7a85b1a70eb90414d3 +size 14244 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/scheduler.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..61cce90db59762d086e6d3b6852a405fa6be19b9 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d84219d2b98872e152b7eefd22ea5f06fe28a80c80055cd7d460c7c6e20ea10 +size 1064 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/special_tokens_map.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/tokenizer.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/tokenizer_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/trainer_state.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..03fc3046a5274a5f8c7231d58fd0b18d1b9ba404 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9788965475844f6c096aac027cc58c86af912874023f3a72303a7f151f0020f9 +size 6429 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/training_args.bin b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ebf1dace1dcc1910d72f500623fa5b6d16a55c7e --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcac0eab69de7e7d1596125825d3cbcfd9d548b70dea2145474207fda49a701f +size 5368 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/vocab.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-140/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/README.md b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/README.md new file mode 100644 index 0000000000000000000000000000000000000000..df1e6579f8514067746f100036eee293925136db --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-1.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/adapter_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6a0f1f3765fdd9d8d1d20411cc0649b1e819d4e9 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297724093628fd96dcff09194a9bfece227f9834572607cec9130dd1dd50c91f +size 727 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43296e6db69617a21de2bc6f33465efa0878c838 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffbb1ea6de2f83f04194e1e453e77d05f3af9e371c6add37dca603178ae644d1 +size 36981072 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/added_tokens.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/merges.txt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/optimizer.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ef4744d535d52396b67d4d5e4454c63d14ca27e4 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08e4368bb57350a79a5a7cd2a4b3d78980705e02633dc9f5ee6550345cf28bca +size 74188650 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/rng_state.pth b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..41dfa7d7903dea42d227bad638c2c750928d590c --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c062f7f375beded48b5337f5a3f3a5cb38807fa3e85dbf3e294c0ab6b627bfc2 +size 14244 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/scheduler.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..caf175aaec2f1527a696c358e4aa29e516403eb7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c52b9ccde85457c1de9dfacdf0248d968713afb8256de2ba990f67de54c96d00 +size 1064 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/special_tokens_map.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/tokenizer.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/tokenizer_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/trainer_state.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cb2348a40f8eba7e412fa4127d6c342b12396378 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2edc1e61fedf7f7bb47cf19a966bbe99901ecc0e7b47913db22a58065f8bcc73 +size 7856 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/training_args.bin b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ebf1dace1dcc1910d72f500623fa5b6d16a55c7e --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcac0eab69de7e7d1596125825d3cbcfd9d548b70dea2145474207fda49a701f +size 5368 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/vocab.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-175/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/README.md b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/README.md new file mode 100644 index 0000000000000000000000000000000000000000..df1e6579f8514067746f100036eee293925136db --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-1.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/adapter_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6a0f1f3765fdd9d8d1d20411cc0649b1e819d4e9 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297724093628fd96dcff09194a9bfece227f9834572607cec9130dd1dd50c91f +size 727 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9cf3304a42a2200da26b507780e9c5e29b8b26f --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ad92a8d2cae429c6187074eec53fda9dd913f95d2a263a687a865843ac6170b +size 36981072 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/added_tokens.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/merges.txt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/optimizer.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..20bd0f92e75fd3cb6023637fd879818d3ba3705f --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f35afbe4ff6348b15e65447edde948c4ee972562948654f8a99fe73cc9c51dc7 +size 74188650 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/rng_state.pth b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..407e47abbdfb90afd3e1f979b5c0260135d2050e --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9899ccda7f0d8d9511991180b93aab508ce6e8489de708c88ad1188e7e1d90d6 +size 14244 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/scheduler.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..419b1e14bcc9207421651855b0ff80e2794ef324 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:899dadd74ba016d8a701a0e5dc593ac31e21ddf1b80316db30ef9fa873894423 +size 1064 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/special_tokens_map.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/tokenizer.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/tokenizer_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/trainer_state.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d74e3765d5e9c33c582bfaf08b69c9edd7db9438 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:606c9d30292831f53b609ec74d7201c425f937c558a48bbd377da42e5c12dc24 +size 9281 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/training_args.bin b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ebf1dace1dcc1910d72f500623fa5b6d16a55c7e --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcac0eab69de7e7d1596125825d3cbcfd9d548b70dea2145474207fda49a701f +size 5368 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/vocab.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-210/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/README.md b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/README.md new file mode 100644 index 0000000000000000000000000000000000000000..df1e6579f8514067746f100036eee293925136db --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-1.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/adapter_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6a0f1f3765fdd9d8d1d20411cc0649b1e819d4e9 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297724093628fd96dcff09194a9bfece227f9834572607cec9130dd1dd50c91f +size 727 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..982a74b91b96607e06274539a1746ef0f8d42196 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b203fcc1b210c21cc5c1dfe877563cd48ae9357e840c777824a3cbeda9297124 +size 36981072 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/added_tokens.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/merges.txt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/optimizer.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8edb4abcb186b6ed3c852388222fc5408f8077a1 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3044eeb8348b4b14950e2e04c11ff3ba99599556cb1c783be2099d1e8a39814f +size 74188650 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/rng_state.pth b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..585f2a6a28a143f55b2a0574f949d7f87bb22a65 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48ee9b73399c28d7e668360bf1d5a4d11095c4738bf96c13f7bb6fbff59f8ccb +size 14244 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/scheduler.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..06881b899f23cfe38e114beffd6a2057ea8fadf6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bce9fd37ac7a73d2e522f6aa056fe3582cfe6e7367584d378837991de3ebbf16 +size 1064 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/special_tokens_map.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/tokenizer.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/tokenizer_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/trainer_state.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..499326424a8f1841e38a53cb80b74c1def77e1cf --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9730ab2148c91c740bb14e55334f0e825ae17bd865add5938965452f59011bd9 +size 10707 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/training_args.bin b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ebf1dace1dcc1910d72f500623fa5b6d16a55c7e --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcac0eab69de7e7d1596125825d3cbcfd9d548b70dea2145474207fda49a701f +size 5368 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/vocab.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-245/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/README.md b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/README.md new file mode 100644 index 0000000000000000000000000000000000000000..df1e6579f8514067746f100036eee293925136db --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-1.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/adapter_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6a0f1f3765fdd9d8d1d20411cc0649b1e819d4e9 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297724093628fd96dcff09194a9bfece227f9834572607cec9130dd1dd50c91f +size 727 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..237733303bdff70899677b45e29ef32a5eddb7ab --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:369eb10a5370a0ffbec02d2808c29ccfb72d7c5e44f1c680d615a1ae80d9c537 +size 36981072 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/added_tokens.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/merges.txt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/optimizer.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc5abdb8499866c4a6bd4c6b35eb15538691ca6f --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c89073687fecba080e3ae4af8b7e592e3b4bd7fea77e66308b1bff902ad9abde +size 74188650 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/rng_state.pth b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..87383f4346e5c6eea0e725de97c392797ed938b5 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d245e05e72192c132e0f2edb6fdcae0c578c890f0fe912f17ec7b0bba2d38cc3 +size 14244 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/scheduler.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8524c54f7e80c306ab69001ab3e5ce83c067a269 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48872d731120fe18674f751953b72c38d9a966768719eb9601a17faaf31a5957 +size 1064 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/special_tokens_map.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/tokenizer.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/tokenizer_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/trainer_state.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4efe6e920afbe28942fdea5a2cf1219648033d6f --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53a9b11ecb988689490ab01e347a9a0edd5e9675347167f13514c3ae37b47427 +size 12138 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/training_args.bin b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ebf1dace1dcc1910d72f500623fa5b6d16a55c7e --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcac0eab69de7e7d1596125825d3cbcfd9d548b70dea2145474207fda49a701f +size 5368 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/vocab.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-280/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/README.md b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/README.md new file mode 100644 index 0000000000000000000000000000000000000000..df1e6579f8514067746f100036eee293925136db --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-1.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/adapter_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6a0f1f3765fdd9d8d1d20411cc0649b1e819d4e9 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297724093628fd96dcff09194a9bfece227f9834572607cec9130dd1dd50c91f +size 727 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..593dc2062c0508338e21c0c846356f66811d63f0 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:078b749552d8752613e29645623d50cc7aa0766c3bdcd7604f4fadb81b61e4a3 +size 36981072 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/added_tokens.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/merges.txt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/optimizer.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2de806294774ee66823146844eb4651134eae0ef --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07a26def3c74c47c5723515c0c39783761588378ec7db5dba2cc49eed74736ed +size 74188650 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/rng_state.pth b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..87571ba044576778d1d6e555eff20ea04c20bbab --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5edb34d031c0c2b447f3eaadb401a4c1e7e7e6d8c096e28b7092e01a8bd48c92 +size 14244 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/scheduler.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e862fd38e2c7cdddf014a017679cfb606782bbfd --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d3743edaf33aa6d682b5e3cd55c73686d21dd7b15b0ef47ae54db03259173ce +size 1064 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/special_tokens_map.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/tokenizer.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/tokenizer_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/trainer_state.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d374dea6e0400b01d3e89ae9f4b603a239c5e46a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc80808a4a32c82cd9583294f7b6176b595d79b4d4be9c42ef09145581baa199 +size 13568 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/training_args.bin b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ebf1dace1dcc1910d72f500623fa5b6d16a55c7e --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcac0eab69de7e7d1596125825d3cbcfd9d548b70dea2145474207fda49a701f +size 5368 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/vocab.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-315/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/README.md b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/README.md new file mode 100644 index 0000000000000000000000000000000000000000..df1e6579f8514067746f100036eee293925136db --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-1.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/adapter_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6a0f1f3765fdd9d8d1d20411cc0649b1e819d4e9 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297724093628fd96dcff09194a9bfece227f9834572607cec9130dd1dd50c91f +size 727 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6adb48b6db1d7fe785ccb281c965844e0ec3b440 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee7ac3c4fdf8685ae43337ff079a6aa3346de0620e61eafae782226dcd5365a7 +size 36981072 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/added_tokens.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/merges.txt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/optimizer.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c60053a3dfbcc23dea445fced804e04b2cc2f827 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd1592e3c85f2f8efb691ca314a32e9b8b04606aa6c46f59c7586f669ae43c55 +size 74188650 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/rng_state.pth b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..33cefe6919222ddfa3c3946df69b8e5c5a17a0fc --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff264f99d31b522cc7e2a4eac9d38606d0c58a34c0adc74d71e0ca8b371dc36 +size 14244 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/scheduler.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..73bbef1c4091057200c13a251b858c508a5f73cf --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:713eaf28d14874341960ad352a5374454ad5735a0a3ccca502696adbb018f8cd +size 1064 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/special_tokens_map.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/tokenizer.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/tokenizer_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/trainer_state.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5c8a3701d0295b2f47549dbd1d29d169d4a52acc --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f769430c3dad9c7a2fe6f82fd3ea40eb6647d115faabe950411230a7fc0666eb +size 2168 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/training_args.bin b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ebf1dace1dcc1910d72f500623fa5b6d16a55c7e --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcac0eab69de7e7d1596125825d3cbcfd9d548b70dea2145474207fda49a701f +size 5368 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/vocab.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-35/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/README.md b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/README.md new file mode 100644 index 0000000000000000000000000000000000000000..df1e6579f8514067746f100036eee293925136db --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-1.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/adapter_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6a0f1f3765fdd9d8d1d20411cc0649b1e819d4e9 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297724093628fd96dcff09194a9bfece227f9834572607cec9130dd1dd50c91f +size 727 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ee6f5e2276c193004c17d84b4f666bd86c5f054 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cf2dff6f30153732831ae688716fdcd40224275cf9c87e1ffda80e695f17fb6 +size 36981072 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/added_tokens.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/merges.txt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/optimizer.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a8acf4d8d235b1e10db77c7bf5698c3e1f0d3cd1 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62177c69fe76d902a5d965667137243a8f02503b9566dfdce91b6d87a23c47cb +size 74188650 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/rng_state.pth b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..58d17c224a6607097f486f1f48f823a1a17c6ef9 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3e5d946241df2516b06d7074d8779088eae7607173ad780df56583910a9589b +size 14244 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/scheduler.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f76c07a3f0e64c9417333cce47e5b470958a860d --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70732dec4f14211b78555ce1acfc091b59c12a05595924c42e5da10d3a62cd6d +size 1064 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/special_tokens_map.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/tokenizer.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/tokenizer_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/trainer_state.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5972a189e57ab38d85da981abd78945d777fd1d1 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09ff64bdf759f9a3efb5340975d08f8082300adf76b3868100a471998886656a +size 14976 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/training_args.bin b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ebf1dace1dcc1910d72f500623fa5b6d16a55c7e --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcac0eab69de7e7d1596125825d3cbcfd9d548b70dea2145474207fda49a701f +size 5368 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/vocab.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-350/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/README.md b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/README.md new file mode 100644 index 0000000000000000000000000000000000000000..df1e6579f8514067746f100036eee293925136db --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: Qwen/Qwen2.5-1.5B-Instruct +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.11.1 \ No newline at end of file diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/adapter_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6a0f1f3765fdd9d8d1d20411cc0649b1e819d4e9 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/adapter_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297724093628fd96dcff09194a9bfece227f9834572607cec9130dd1dd50c91f +size 727 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/adapter_model.safetensors b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80e9c90deeee41e19dfd112268f1946405e0086d --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33cb14b211d0dac508d27674967e99390c240c098e6d1d80d5a14c17fd5c623c +size 36981072 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/added_tokens.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13ea617def6b99e484e257e68cda43fa549161 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b +size 605 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/merges.txt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/optimizer.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..35c0d355b55ae0c19a02ac49122ea0b855aa1ada --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baac321b264c07df51fba1a91926324680c30c50cf16681d229729e5c13f51e5 +size 74188650 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/rng_state.pth b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..323d589b0258d085c7a07b0c4f97b26796e865c5 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9196a1e708bf24d6abba41cce3f8558820acc3e50f9394c5955e29eb41ffea3d +size 14244 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/scheduler.pt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..43271f9b860a5a3e1ca9e072a5f84047795b51de --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1a6560ee67f398abb4366caa0f28dce005bfe2be169777509fc1f459e63edc6 +size 1064 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/special_tokens_map.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/tokenizer.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/tokenizer_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/trainer_state.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b75f34cf7715afb3a6e0f5451ff7d8f818a0c729 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd495d121ab346a0018819ea97649ab711a4099586b34391e0bfe6e33eb5e505 +size 3587 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/training_args.bin b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ebf1dace1dcc1910d72f500623fa5b6d16a55c7e --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcac0eab69de7e7d1596125825d3cbcfd9d548b70dea2145474207fda49a701f +size 5368 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/vocab.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/checkpoint-70/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/eval_results.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8abebd8be630bbb69770d5beaea551c24e233244 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/eval_results.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b3b07e3a286d0eabf548fdfe6da40a1f3888e19fc0ade4b9e324198844eaf49 +size 178 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/merges.txt b/llama-factory/saves/Qwen2.5-1.5B-Instruct/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..80c1a19fae38f8f4c9ab32cc9d4e145c241147e6 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5 +size 1671853 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/special_tokens_map.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ee3f6af3ba18b7d721f02bdf3fbca2814b7eda35 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd +size 613 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/tokenizer.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..7ebe2829a5954e029cfeb28ed919a1bd4096886a --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8506e7111b80c6d8635951a02eab0f4e1a8e4e5772da83846579e97b16f61bf +size 7031673 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/tokenizer_config.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e808236ca479658882170545cee38202d32ac5d7 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7d46802920503f03ac0eef1adbb2254bf8898baac973f1333265b18dd2b890 +size 5333 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/train_results.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fd29ac90759f6edbf75cf6accf1ae02fecc121b5 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/train_results.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1023038f8fa5763742e9b6e7c389eeac34700bfc75ef070d6876f644e36e2f1 +size 225 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/trainer_log.jsonl b/llama-factory/saves/Qwen2.5-1.5B-Instruct/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..dd4fcc4dbcba06598244c856dd3796571d9cb692 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/trainer_log.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56f6ec12ff4273f54dc98209f59d48585e9690de4632f174bc9bc3dcdeff45db +size 16259 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/trainer_state.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c40f7489800422ebbd1e525e03384e83fb4f01d3 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ae0d5c7970055169a183b08f2933ac22765d31dab8fa04045a82e71f9caf71f +size 15242 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/training_args.bin b/llama-factory/saves/Qwen2.5-1.5B-Instruct/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ebf1dace1dcc1910d72f500623fa5b6d16a55c7e --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcac0eab69de7e7d1596125825d3cbcfd9d548b70dea2145474207fda49a701f +size 5368 diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/training_eval_loss.png b/llama-factory/saves/Qwen2.5-1.5B-Instruct/training_eval_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..1b8888b06464bc4290d2b5219c06931f8edb8add Binary files /dev/null and b/llama-factory/saves/Qwen2.5-1.5B-Instruct/training_eval_loss.png differ diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/training_loss.png b/llama-factory/saves/Qwen2.5-1.5B-Instruct/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..dc6d1f348afdb74fc8a3082c0bd6417e8117c8ce Binary files /dev/null and b/llama-factory/saves/Qwen2.5-1.5B-Instruct/training_loss.png differ diff --git a/llama-factory/saves/Qwen2.5-1.5B-Instruct/vocab.json b/llama-factory/saves/Qwen2.5-1.5B-Instruct/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..6c49fc63bcb109de13abe49e58f85a4cdba7b679 --- /dev/null +++ b/llama-factory/saves/Qwen2.5-1.5B-Instruct/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910 +size 2776833 diff --git a/notebooks/07_Qwen2.5_models.ipynb b/notebooks/07_Qwen2.5_models.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..42630a93416bcba9bbe27212ffd281b8c20610c8 --- /dev/null +++ b/notebooks/07_Qwen2.5_models.ipynb @@ -0,0 +1,16726 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "0ea8b46b-839b-445b-8043-ccdf4e920ace", + "showTitle": false, + "title": "" + }, + "id": "YLH80COBzi_F" + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "63B5exAuzq4M" + }, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "if \"workding_dir\" not in locals():\n", + " try:\n", + " from google.colab import drive\n", + " drive.mount('/content/drive')\n", + " workding_dir = \"/content/drive/MyDrive/logical-reasoning/\"\n", + " except ModuleNotFoundError:\n", + " workding_dir = str(Path.cwd().parent)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "executionInfo": { + "elapsed": 368, + "status": "ok", + "timestamp": 1719461634865, + "user": { + "displayName": "Donghao Huang", + "userId": "00463591218503521679" + }, + "user_tz": -480 + }, + "id": "zFulf0bg0H-9", + "outputId": "debdd535-c828-40b9-efc0-8a180e5830dd" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "workding dir: /home/inflaton/code/logical-reasoning\n" + ] + } + ], + "source": [ + "import os\n", + "import sys\n", + "\n", + "os.chdir(workding_dir)\n", + "sys.path.append(workding_dir)\n", + "print(\"workding dir:\", workding_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "9f67ec60-2f24-411c-84eb-0dd664b44775", + "showTitle": false, + "title": "" + }, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 589, + "status": "ok", + "timestamp": 1719462011879, + "user": { + "displayName": "Donghao Huang", + "userId": "00463591218503521679" + }, + "user_tz": -480 + }, + "id": "DIUiweYYzi_I", + "outputId": "e16e9247-9077-4b0c-f8ea-17059f05a1c4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Current Directory:\n", + "/home/inflaton/code/logical-reasoning\n", + "Sat Sep 21 15:45:11 2024 \n", + "+-----------------------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 560.35.02 Driver Version: 560.94 CUDA Version: 12.6 |\n", + "|-----------------------------------------+------------------------+----------------------+\n", + "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", + "| | | MIG M. |\n", + "|=========================================+========================+======================|\n", + "| 0 NVIDIA GeForce RTX 4090 On | 00000000:01:00.0 On | Off |\n", + "| 37% 54C P8 22W / 450W | 535MiB / 24564MiB | 4% Default |\n", + "| | | N/A |\n", + "+-----------------------------------------+------------------------+----------------------+\n", + " \n", + "+-----------------------------------------------------------------------------------------+\n", + "| Processes: |\n", + "| GPU GI CI PID Type Process name GPU Memory |\n", + "| ID ID Usage |\n", + "|=========================================================================================|\n", + "| 0 N/A N/A 25 G /Xwayland N/A |\n", + "+-----------------------------------------------------------------------------------------+\n", + "Linux Gen-AI 5.15.133.1-microsoft-standard-WSL2 #1 SMP Thu Oct 5 21:02:42 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux\n", + "PRETTY_NAME=\"Ubuntu 22.04.2 LTS\"\n", + "NAME=\"Ubuntu\"\n", + "VERSION_ID=\"22.04\"\n", + "VERSION=\"22.04.2 LTS (Jammy Jellyfish)\"\n", + "VERSION_CODENAME=jammy\n", + "ID=ubuntu\n", + "ID_LIKE=debian\n", + "HOME_URL=\"https://www.ubuntu.com/\"\n", + "SUPPORT_URL=\"https://help.ubuntu.com/\"\n", + "BUG_REPORT_URL=\"https://bugs.launchpad.net/ubuntu/\"\n", + "PRIVACY_POLICY_URL=\"https://www.ubuntu.com/legal/terms-and-policies/privacy-policy\"\n", + "UBUNTU_CODENAME=jammy\n", + "Architecture: x86_64\n", + " CPU op-mode(s): 32-bit, 64-bit\n", + " Address sizes: 39 bits physical, 48 bits virtual\n", + " Byte Order: Little Endian\n", + "CPU(s): 32\n", + " On-line CPU(s) list: 0-31\n", + "Vendor ID: GenuineIntel\n", + " Model name: 13th Gen Intel(R) Core(TM) i9-13900KF\n", + " CPU family: 6\n", + " Model: 183\n", + " Thread(s) per core: 2\n", + " Core(s) per socket: 16\n", + " Socket(s): 1\n", + " Stepping: 1\n", + " BogoMIPS: 5990.39\n", + " Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mc\n", + " a cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscal\n", + " l nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopo\n", + " logy tsc_reliable nonstop_tsc cpuid pni pclmulqdq vmx s\n", + " sse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt tsc_dea\n", + " dline_timer aes xsave avx f16c rdrand hypervisor lahf_l\n", + " m abm 3dnowprefetch ssbd ibrs ibpb stibp ibrs_enhanced \n", + " tpr_shadow vnmi ept vpid ept_ad fsgsbase tsc_adjust bmi\n", + " 1 avx2 smep bmi2 erms invpcid rdseed adx smap clflushop\n", + " t clwb sha_ni xsaveopt xsavec xgetbv1 xsaves avx_vnni u\n", + " mip waitpkg gfni vaes vpclmulqdq rdpid movdiri movdir64\n", + " b fsrm md_clear serialize flush_l1d arch_capabilities\n", + "Virtualization features: \n", + " Virtualization: VT-x\n", + " Hypervisor vendor: Microsoft\n", + " Virtualization type: full\n", + "Caches (sum of all): \n", + " L1d: 768 KiB (16 instances)\n", + " L1i: 512 KiB (16 instances)\n", + " L2: 32 MiB (16 instances)\n", + " L3: 36 MiB (1 instance)\n", + "Vulnerabilities: \n", + " Gather data sampling: Not affected\n", + " Itlb multihit: Not affected\n", + " L1tf: Not affected\n", + " Mds: Not affected\n", + " Meltdown: Not affected\n", + " Mmio stale data: Not affected\n", + " Retbleed: Mitigation; Enhanced IBRS\n", + " Spec rstack overflow: Not affected\n", + " Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl\n", + " and seccomp\n", + " Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer\n", + " sanitization\n", + " Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB fillin\n", + " g, PBRSB-eIBRS SW sequence\n", + " Srbds: Not affected\n", + " Tsx async abort: Not affected\n", + "MemTotal: 49330024 kB\n", + "Current Directory:\n", + "/home/inflaton/code/logical-reasoning/llama-factory\n", + "loading env vars from: /home/inflaton/code/logical-reasoning/.env\n", + "Adding /home/inflaton/code/logical-reasoning to sys.path\n", + "loading /home/inflaton/code/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n", + "Qwen Qwen2.5-3B-Instruct qwen config/mgtv_template.yaml ../datasets/mgtv\n", + "Writing to config/models/Qwen2.5-3B-Instruct.yaml\n", + "config/models/Qwen2.5-3B-Instruct.yaml:\n", + " {\n", + " \"model_name_or_path\": \"Qwen/Qwen2.5-3B-Instruct\",\n", + " \"stage\": \"sft\",\n", + " \"do_train\": true,\n", + " \"finetuning_type\": \"lora\",\n", + " \"lora_target\": \"all\",\n", + " \"dataset\": \"alpaca_mgtv_p2\",\n", + " \"template\": \"qwen\",\n", + " \"cutoff_len\": 8192,\n", + " \"max_samples\": 25000,\n", + " \"overwrite_cache\": true,\n", + " \"preprocessing_num_workers\": 16,\n", + " \"output_dir\": \"saves/Qwen2.5-3B-Instruct\",\n", + " \"logging_steps\": 5,\n", + " \"save_steps\": 35,\n", + " \"plot_loss\": true,\n", + " \"per_device_train_batch_size\": 16,\n", + " \"gradient_accumulation_steps\": 8,\n", + " \"learning_rate\": 0.0001,\n", + " \"num_train_epochs\": 2.0,\n", + " \"lr_scheduler_type\": \"cosine\",\n", + " \"warmup_ratio\": 0.1,\n", + " \"bf16\": true,\n", + " \"ddp_timeout\": 180000000,\n", + " \"val_size\": 0.1,\n", + " \"per_device_eval_batch_size\": 1,\n", + " \"eval_strategy\": \"steps\",\n", + " \"eval_steps\": 35,\n", + " \"report_to\": \"none\",\n", + " \"run_name\": \"Qwen2.5-3B-Instruct_lora_sft\"\n", + "}\n", + "09/21/2024 15:45:22 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16\n", + "[INFO|configuration_utils.py:733] 2024-09-21 15:45:23,254 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-3B-Instruct/snapshots/82f42baa094a9600e39ccd80d34058aeeb3abbc1/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 15:45:23,255 >> Model config Qwen2Config {\n", + " \"_name_or_path\": \"Qwen/Qwen2.5-3B-Instruct\",\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 2048,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 11008,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 70,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 16,\n", + " \"num_hidden_layers\": 36,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:45:23,514 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-3B-Instruct/snapshots/82f42baa094a9600e39ccd80d34058aeeb3abbc1/vocab.json\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:45:23,514 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-3B-Instruct/snapshots/82f42baa094a9600e39ccd80d34058aeeb3abbc1/merges.txt\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:45:23,514 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-3B-Instruct/snapshots/82f42baa094a9600e39ccd80d34058aeeb3abbc1/tokenizer.json\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:45:23,514 >> loading file added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:45:23,514 >> loading file special_tokens_map.json from cache at None\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:45:23,514 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-3B-Instruct/snapshots/82f42baa094a9600e39ccd80d34058aeeb3abbc1/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2533] 2024-09-21 15:45:23,600 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", + "[INFO|configuration_utils.py:733] 2024-09-21 15:45:24,723 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-3B-Instruct/snapshots/82f42baa094a9600e39ccd80d34058aeeb3abbc1/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 15:45:24,724 >> Model config Qwen2Config {\n", + " \"_name_or_path\": \"Qwen/Qwen2.5-3B-Instruct\",\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 2048,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 11008,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 70,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 16,\n", + " \"num_hidden_layers\": 36,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:45:24,997 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-3B-Instruct/snapshots/82f42baa094a9600e39ccd80d34058aeeb3abbc1/vocab.json\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:45:24,997 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-3B-Instruct/snapshots/82f42baa094a9600e39ccd80d34058aeeb3abbc1/merges.txt\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:45:24,997 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-3B-Instruct/snapshots/82f42baa094a9600e39ccd80d34058aeeb3abbc1/tokenizer.json\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:45:24,997 >> loading file added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:45:24,997 >> loading file special_tokens_map.json from cache at None\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:45:24,997 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-3B-Instruct/snapshots/82f42baa094a9600e39ccd80d34058aeeb3abbc1/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2533] 2024-09-21 15:45:25,089 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", + "09/21/2024 15:45:25 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", + "09/21/2024 15:45:25 - INFO - llamafactory.data.loader - Loading dataset alpaca_mgtv_p2.json...\n", + "Converting format of dataset (num_proc=16): 100%|█| 25000/25000 [00:00<00:00, 11\n", + "Running tokenizer on dataset (num_proc=16): 100%|█| 25000/25000 [00:01<00:00, 16\n", + "training example:\n", + "input_ids:\n", + "[151644, 8948, 198, 2610, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 56568, 101909, 108024, 101497, 107969, 99329, 9370, 106040, 1773, 99329, 104190, 104506, 48443, 16, 13, 26853, 224, 57218, 28946, 36993, 101051, 46944, 107969, 27091, 3837, 107969, 27091, 36993, 53481, 46944, 100405, 99518, 104151, 101128, 9370, 57621, 8997, 17, 13, 89982, 68878, 17340, 99392, 107969, 99413, 3837, 107969, 99413, 20412, 107969, 27091, 111230, 8997, 18, 13, 26853, 224, 57218, 28946, 73670, 105396, 99885, 106386, 28330, 86119, 112469, 100246, 57621, 9370, 106538, 8997, 19, 13, 69162, 34204, 103991, 86119, 3837, 106040, 44063, 100345, 107591, 102104, 87752, 105220, 109487, 100653, 5122, 20412, 5373, 99520, 5373, 16530, 99335, 5373, 102104, 88991, 5373, 56007, 24339, 32100, 1773, 99200, 102104, 9370, 104317, 100142, 104506, 28311, 256, 481, 92498, 107969, 27091, 33108, 107969, 99413, 114562, 86119, 111230, 3837, 102104, 5122, 20412, 100631, 99520, 198, 256, 481, 92498, 107969, 27091, 33108, 107969, 99413, 53153, 101041, 100631, 108349, 83751, 63789, 20221, 86119, 111230, 3837, 102104, 5122, 16530, 99335, 198, 256, 481, 92498, 111842, 107666, 113479, 106386, 28330, 86119, 100631, 86119, 104151, 101128, 3837, 102104, 5122, 56007, 24339, 32100, 198, 256, 481, 92498, 111842, 107666, 99797, 108670, 34187, 107969, 99413, 106538, 3837, 102104, 5122, 102104, 88991, 198, 20, 13, 49602, 252, 99590, 15946, 53153, 42855, 99885, 102158, 27369, 3837, 105827, 65770, 99475, 109487, 101047, 110281, 18600, 1773, 77557, 3837, 108620, 99360, 2073, 99520, 854, 65770, 99475, 12857, 2073, 16530, 96332, 14880, 110439, 100001, 104190, 102104, 111842, 101080, 103936, 3407, 334, 107969, 27091, 66963, 73562, 109628, 45629, 105489, 3837, 104133, 111718, 106023, 5122, 101988, 115865, 110731, 9370, 105419, 3837, 115865, 99810, 69249, 59743, 104133, 104003, 115865, 36993, 16530, 101401, 68536, 99723, 3837, 115967, 104270, 102060, 110666, 112031, 1773, 14880, 109363, 115865, 110786, 101423, 104249, 3407, 334, 107969, 99413, 66963, 10236, 250, 253, 48921, 101221, 57218, 101961, 7948, 100894, 9370, 99288, 99818, 101063, 1773, 104269, 99288, 99818, 100774, 13343, 3837, 99798, 57218, 101961, 105664, 102373, 48921, 100271, 1773, 99650, 105616, 18493, 115865, 110731, 9370, 105419, 104388, 1773, 103968, 3837, 102606, 102115, 17340, 3837, 102373, 18493, 106340, 24562, 99774, 82224, 104424, 15946, 99372, 99244, 1773, 110597, 9370, 99288, 99818, 100012, 101416, 63109, 99242, 9370, 102373, 3837, 101988, 101938, 44063, 104003, 115865, 101329, 99314, 3837, 107974, 102373, 9370, 104575, 24562, 3837, 105699, 116418, 100005, 103000, 90663, 1773, 100147, 101070, 105443, 34187, 100097, 3837, 104989, 100833, 69249, 46944, 105190, 9370, 106023, 3407, 334, 111842, 101080, 103936, 66963, 4891, 223, 115, 100623, 21317, 99315, 101037, 198, 151645, 198, 151644, 77091, 198, 99520, 151645]\n", + "inputs:\n", + "<|im_start|>system\n", + "You are a helpful assistant.<|im_end|>\n", + "<|im_start|>user\n", + "你是一个情景猜谜游戏的主持人。游戏规则如下:\n", + "\n", + "1. 参与者会得到一个谜面,谜面会描述一个简单又难以理解的事件。\n", + "2. 主持人知道谜底,谜底是谜面的答案。\n", + "3. 参与者可以询问任何封闭式问题来找寻事件的真相。\n", + "4. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。各回答的判断标准如下:\n", + " - 若谜面和谜底能找到问题的答案,回答:是或者不是\n", + " - 若谜面和谜底不能直接或者间接推断出问题的答案,回答:不重要\n", + " - 若参与者提问不是一个封闭式问题或者问题难以理解,回答:问法错误\n", + " - 若参与者提问基本还原了谜底真相,回答:回答正确\n", + "5. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n", + "\n", + "请严格按照这些规则回答参与者提出的问题。\n", + "\n", + "**谜面:** 在甄家村里,有一个古老的传说:每年南瓜丰收的季节,南瓜田里总有一个最大的南瓜会不翼而飞,村民们对此现象困惑不解。请找出南瓜失踪背后的原因。\n", + "\n", + "**谜底:** 真相原来与一位年迈的农夫有关。这位农夫年轻时,曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季节结婚。然而,命运弄人,姑娘在婚礼前的一场意外中离世。悲伤的农夫为了纪念心爱的姑娘,每年都会将最大的南瓜偷走,放到姑娘的墓前,以此寄托自己的哀思。这一行为延续了多年,成为了乡村里一个神秘的传说。\n", + "\n", + "**参与者提出的问题:** 偷的人信神吗\n", + "<|im_end|>\n", + "<|im_start|>assistant\n", + "不是<|im_end|>\n", + "label_ids:\n", + "[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 99520, 151645]\n", + "labels:\n", + "不是<|im_end|>\n", + "[INFO|configuration_utils.py:733] 2024-09-21 15:45:28,548 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-3B-Instruct/snapshots/82f42baa094a9600e39ccd80d34058aeeb3abbc1/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 15:45:28,548 >> Model config Qwen2Config {\n", + " \"_name_or_path\": \"Qwen/Qwen2.5-3B-Instruct\",\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 2048,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 11008,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 70,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 16,\n", + " \"num_hidden_layers\": 36,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|modeling_utils.py:3634] 2024-09-21 15:45:28,559 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-3B-Instruct/snapshots/82f42baa094a9600e39ccd80d34058aeeb3abbc1/model.safetensors.index.json\n", + "[INFO|modeling_utils.py:1572] 2024-09-21 15:45:28,560 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", + "[INFO|configuration_utils.py:1038] 2024-09-21 15:45:28,560 >> Generate config GenerationConfig {\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645\n", + "}\n", + "\n", + "Loading checkpoint shards: 100%|██████████████████| 2/2 [00:39<00:00, 19.96s/it]\n", + "[INFO|modeling_utils.py:4463] 2024-09-21 15:46:08,667 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", + "\n", + "[INFO|modeling_utils.py:4471] 2024-09-21 15:46:08,667 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2.5-3B-Instruct.\n", + "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", + "[INFO|configuration_utils.py:993] 2024-09-21 15:46:08,922 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-3B-Instruct/snapshots/82f42baa094a9600e39ccd80d34058aeeb3abbc1/generation_config.json\n", + "[INFO|configuration_utils.py:1038] 2024-09-21 15:46:08,922 >> Generate config GenerationConfig {\n", + " \"bos_token_id\": 151643,\n", + " \"do_sample\": true,\n", + " \"eos_token_id\": [\n", + " 151645,\n", + " 151643\n", + " ],\n", + " \"pad_token_id\": 151643,\n", + " \"repetition_penalty\": 1.05,\n", + " \"temperature\": 0.7,\n", + " \"top_k\": 20,\n", + " \"top_p\": 0.8\n", + "}\n", + "\n", + "09/21/2024 15:46:09 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.\n", + "09/21/2024 15:46:09 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", + "09/21/2024 15:46:09 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.\n", + "09/21/2024 15:46:09 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA\n", + "09/21/2024 15:46:09 - INFO - llamafactory.model.model_utils.misc - Found linear modules: v_proj,q_proj,k_proj,up_proj,gate_proj,down_proj,o_proj\n", + "09/21/2024 15:46:09 - INFO - llamafactory.model.loader - trainable params: 14,966,784 || all params: 3,100,905,472 || trainable%: 0.4827\n", + "[INFO|trainer.py:648] 2024-09-21 15:46:09,435 >> Using auto half precision backend\n", + "[INFO|trainer.py:2134] 2024-09-21 15:46:09,741 >> ***** Running training *****\n", + "[INFO|trainer.py:2135] 2024-09-21 15:46:09,741 >> Num examples = 22,500\n", + "[INFO|trainer.py:2136] 2024-09-21 15:46:09,741 >> Num Epochs = 2\n", + "[INFO|trainer.py:2137] 2024-09-21 15:46:09,741 >> Instantaneous batch size per device = 16\n", + "[INFO|trainer.py:2140] 2024-09-21 15:46:09,741 >> Total train batch size (w. parallel, distributed & accumulation) = 128\n", + "[INFO|trainer.py:2141] 2024-09-21 15:46:09,741 >> Gradient Accumulation steps = 8\n", + "[INFO|trainer.py:2142] 2024-09-21 15:46:09,741 >> Total optimization steps = 350\n", + "[INFO|trainer.py:2143] 2024-09-21 15:46:09,743 >> Number of trainable parameters = 14,966,784\n", + " 0%| | 0/350 [00:00\n", + " sys.exit(main())\n", + " ^^^^^^\n", + " File \"/home/inflaton/code/LLaMA-Factory/src/llamafactory/cli.py\", line 111, in main\n", + " run_exp()\n", + " File \"/home/inflaton/code/LLaMA-Factory/src/llamafactory/train/tuner.py\", line 50, in run_exp\n", + " run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks)\n", + " File \"/home/inflaton/code/LLaMA-Factory/src/llamafactory/train/sft/workflow.py\", line 96, in run_sft\n", + " train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/trainer.py\", line 1938, in train\n", + " return inner_training_loop(\n", + " ^^^^^^^^^^^^^^^^^^^^\n", + " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/trainer.py\", line 2279, in _inner_training_loop\n", + " tr_loss_step = self.training_step(model, inputs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/trainer.py\", line 3318, in training_step\n", + " loss = self.compute_loss(model, inputs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/trainer.py\", line 3363, in compute_loss\n", + " outputs = model(**inputs)\n", + " ^^^^^^^^^^^^^^^\n", + " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1532, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1541, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/accelerate/utils/operations.py\", line 819, in forward\n", + " return model_forward(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/accelerate/utils/operations.py\", line 807, in __call__\n", + " return convert_to_fp32(self.model_forward(*args, **kwargs))\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/torch/amp/autocast_mode.py\", line 16, in decorate_autocast\n", + " return func(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/peft/peft_model.py\", line 1430, in forward\n", + " return self.base_model(\n", + " ^^^^^^^^^^^^^^^^\n", + " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1532, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1541, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/peft/tuners/tuners_utils.py\", line 179, in forward\n", + " return self.model.forward(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/transformers/models/qwen2/modeling_qwen2.py\", line 1082, in forward\n", + " loss = loss_fct(shift_logits, shift_labels)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1532, in _wrapped_call_impl\n", + " return self._call_impl(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/torch/nn/modules/module.py\", line 1541, in _call_impl\n", + " return forward_call(*args, **kwargs)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/torch/nn/modules/loss.py\", line 1185, in forward\n", + " return F.cross_entropy(input, target, weight=self.weight,\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + " File \"/home/inflaton/miniconda3/envs/llm-finetuning/lib/python3.11/site-packages/torch/nn/functional.py\", line 3086, in cross_entropy\n", + " return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)\n", + " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", + "RuntimeError: CUDA driver error: out of memory\n", + " 0%| | 0/350 [00:05> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 15:46:21,868 >> Model config Qwen2Config {\n", + " \"_name_or_path\": \"Qwen/Qwen2.5-1.5B-Instruct\",\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 1536,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 8960,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 28,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:46:22,126 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/vocab.json\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:46:22,127 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/merges.txt\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:46:22,127 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/tokenizer.json\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:46:22,127 >> loading file added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:46:22,127 >> loading file special_tokens_map.json from cache at None\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:46:22,127 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2533] 2024-09-21 15:46:22,222 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", + "[INFO|configuration_utils.py:733] 2024-09-21 15:46:23,467 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 15:46:23,468 >> Model config Qwen2Config {\n", + " \"_name_or_path\": \"Qwen/Qwen2.5-1.5B-Instruct\",\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 1536,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 8960,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 28,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:46:23,726 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/vocab.json\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:46:23,726 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/merges.txt\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:46:23,726 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/tokenizer.json\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:46:23,726 >> loading file added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:46:23,726 >> loading file special_tokens_map.json from cache at None\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 15:46:23,726 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2533] 2024-09-21 15:46:23,811 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", + "09/21/2024 15:46:23 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", + "09/21/2024 15:46:23 - INFO - llamafactory.data.loader - Loading dataset alpaca_mgtv_p2.json...\n", + "Converting format of dataset (num_proc=16): 100%|█| 25000/25000 [00:00<00:00, 11\n", + "Running tokenizer on dataset (num_proc=16): 100%|█| 25000/25000 [00:01<00:00, 16\n", + "training example:\n", + "input_ids:\n", + "[151644, 8948, 198, 2610, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 56568, 101909, 108024, 101497, 107969, 99329, 9370, 106040, 1773, 99329, 104190, 104506, 48443, 16, 13, 26853, 224, 57218, 28946, 36993, 101051, 46944, 107969, 27091, 3837, 107969, 27091, 36993, 53481, 46944, 100405, 99518, 104151, 101128, 9370, 57621, 8997, 17, 13, 89982, 68878, 17340, 99392, 107969, 99413, 3837, 107969, 99413, 20412, 107969, 27091, 111230, 8997, 18, 13, 26853, 224, 57218, 28946, 73670, 105396, 99885, 106386, 28330, 86119, 112469, 100246, 57621, 9370, 106538, 8997, 19, 13, 69162, 34204, 103991, 86119, 3837, 106040, 44063, 100345, 107591, 102104, 87752, 105220, 109487, 100653, 5122, 20412, 5373, 99520, 5373, 16530, 99335, 5373, 102104, 88991, 5373, 56007, 24339, 32100, 1773, 99200, 102104, 9370, 104317, 100142, 104506, 28311, 256, 481, 92498, 107969, 27091, 33108, 107969, 99413, 114562, 86119, 111230, 3837, 102104, 5122, 20412, 100631, 99520, 198, 256, 481, 92498, 107969, 27091, 33108, 107969, 99413, 53153, 101041, 100631, 108349, 83751, 63789, 20221, 86119, 111230, 3837, 102104, 5122, 16530, 99335, 198, 256, 481, 92498, 111842, 107666, 113479, 106386, 28330, 86119, 100631, 86119, 104151, 101128, 3837, 102104, 5122, 56007, 24339, 32100, 198, 256, 481, 92498, 111842, 107666, 99797, 108670, 34187, 107969, 99413, 106538, 3837, 102104, 5122, 102104, 88991, 198, 20, 13, 49602, 252, 99590, 15946, 53153, 42855, 99885, 102158, 27369, 3837, 105827, 65770, 99475, 109487, 101047, 110281, 18600, 1773, 77557, 3837, 108620, 99360, 2073, 99520, 854, 65770, 99475, 12857, 2073, 16530, 96332, 14880, 110439, 100001, 104190, 102104, 111842, 101080, 103936, 3407, 334, 107969, 27091, 66963, 73562, 109628, 45629, 105489, 3837, 104133, 111718, 106023, 5122, 101988, 115865, 110731, 9370, 105419, 3837, 115865, 99810, 69249, 59743, 104133, 104003, 115865, 36993, 16530, 101401, 68536, 99723, 3837, 115967, 104270, 102060, 110666, 112031, 1773, 14880, 109363, 115865, 110786, 101423, 104249, 3407, 334, 107969, 99413, 66963, 10236, 250, 253, 48921, 101221, 57218, 101961, 7948, 100894, 9370, 99288, 99818, 101063, 1773, 104269, 99288, 99818, 100774, 13343, 3837, 99798, 57218, 101961, 105664, 102373, 48921, 100271, 1773, 99650, 105616, 18493, 115865, 110731, 9370, 105419, 104388, 1773, 103968, 3837, 102606, 102115, 17340, 3837, 102373, 18493, 106340, 24562, 99774, 82224, 104424, 15946, 99372, 99244, 1773, 110597, 9370, 99288, 99818, 100012, 101416, 63109, 99242, 9370, 102373, 3837, 101988, 101938, 44063, 104003, 115865, 101329, 99314, 3837, 107974, 102373, 9370, 104575, 24562, 3837, 105699, 116418, 100005, 103000, 90663, 1773, 100147, 101070, 105443, 34187, 100097, 3837, 104989, 100833, 69249, 46944, 105190, 9370, 106023, 3407, 334, 111842, 101080, 103936, 66963, 4891, 223, 115, 100623, 21317, 99315, 101037, 198, 151645, 198, 151644, 77091, 198, 99520, 151645]\n", + "inputs:\n", + "<|im_start|>system\n", + "You are a helpful assistant.<|im_end|>\n", + "<|im_start|>user\n", + "你是一个情景猜谜游戏的主持人。游戏规则如下:\n", + "\n", + "1. 参与者会得到一个谜面,谜面会描述一个简单又难以理解的事件。\n", + "2. 主持人知道谜底,谜底是谜面的答案。\n", + "3. 参与者可以询问任何封闭式问题来找寻事件的真相。\n", + "4. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。各回答的判断标准如下:\n", + " - 若谜面和谜底能找到问题的答案,回答:是或者不是\n", + " - 若谜面和谜底不能直接或者间接推断出问题的答案,回答:不重要\n", + " - 若参与者提问不是一个封闭式问题或者问题难以理解,回答:问法错误\n", + " - 若参与者提问基本还原了谜底真相,回答:回答正确\n", + "5. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n", + "\n", + "请严格按照这些规则回答参与者提出的问题。\n", + "\n", + "**谜面:** 在甄家村里,有一个古老的传说:每年南瓜丰收的季节,南瓜田里总有一个最大的南瓜会不翼而飞,村民们对此现象困惑不解。请找出南瓜失踪背后的原因。\n", + "\n", + "**谜底:** 真相原来与一位年迈的农夫有关。这位农夫年轻时,曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季节结婚。然而,命运弄人,姑娘在婚礼前的一场意外中离世。悲伤的农夫为了纪念心爱的姑娘,每年都会将最大的南瓜偷走,放到姑娘的墓前,以此寄托自己的哀思。这一行为延续了多年,成为了乡村里一个神秘的传说。\n", + "\n", + "**参与者提出的问题:** 偷的人信神吗\n", + "<|im_end|>\n", + "<|im_start|>assistant\n", + "不是<|im_end|>\n", + "label_ids:\n", + "[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 99520, 151645]\n", + "labels:\n", + "不是<|im_end|>\n", + "[INFO|configuration_utils.py:733] 2024-09-21 15:46:27,182 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 15:46:27,182 >> Model config Qwen2Config {\n", + " \"_name_or_path\": \"Qwen/Qwen2.5-1.5B-Instruct\",\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 1536,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 8960,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 28,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|modeling_utils.py:3634] 2024-09-21 15:46:27,194 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/model.safetensors\n", + "[INFO|modeling_utils.py:1572] 2024-09-21 15:46:27,198 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", + "[INFO|configuration_utils.py:1038] 2024-09-21 15:46:27,199 >> Generate config GenerationConfig {\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645\n", + "}\n", + "\n", + "[INFO|modeling_utils.py:4463] 2024-09-21 15:46:57,790 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", + "\n", + "[INFO|modeling_utils.py:4471] 2024-09-21 15:46:57,790 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2.5-1.5B-Instruct.\n", + "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", + "[INFO|configuration_utils.py:993] 2024-09-21 15:46:58,065 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/generation_config.json\n", + "[INFO|configuration_utils.py:1038] 2024-09-21 15:46:58,066 >> Generate config GenerationConfig {\n", + " \"bos_token_id\": 151643,\n", + " \"do_sample\": true,\n", + " \"eos_token_id\": [\n", + " 151645,\n", + " 151643\n", + " ],\n", + " \"pad_token_id\": 151643,\n", + " \"repetition_penalty\": 1.1,\n", + " \"temperature\": 0.7,\n", + " \"top_k\": 20,\n", + " \"top_p\": 0.8\n", + "}\n", + "\n", + "09/21/2024 15:46:58 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.\n", + "09/21/2024 15:46:58 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", + "09/21/2024 15:46:58 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.\n", + "09/21/2024 15:46:58 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA\n", + "09/21/2024 15:46:58 - INFO - llamafactory.model.model_utils.misc - Found linear modules: up_proj,k_proj,o_proj,down_proj,gate_proj,v_proj,q_proj\n", + "09/21/2024 15:46:58 - INFO - llamafactory.model.loader - trainable params: 9,232,384 || all params: 1,552,946,688 || trainable%: 0.5945\n", + "[INFO|trainer.py:648] 2024-09-21 15:46:58,533 >> Using auto half precision backend\n", + "[INFO|trainer.py:2134] 2024-09-21 15:46:58,837 >> ***** Running training *****\n", + "[INFO|trainer.py:2135] 2024-09-21 15:46:58,837 >> Num examples = 22,500\n", + "[INFO|trainer.py:2136] 2024-09-21 15:46:58,837 >> Num Epochs = 2\n", + "[INFO|trainer.py:2137] 2024-09-21 15:46:58,837 >> Instantaneous batch size per device = 16\n", + "[INFO|trainer.py:2140] 2024-09-21 15:46:58,837 >> Total train batch size (w. parallel, distributed & accumulation) = 128\n", + "[INFO|trainer.py:2141] 2024-09-21 15:46:58,837 >> Gradient Accumulation steps = 8\n", + "[INFO|trainer.py:2142] 2024-09-21 15:46:58,837 >> Total optimization steps = 350\n", + "[INFO|trainer.py:2143] 2024-09-21 15:46:58,839 >> Number of trainable parameters = 9,232,384\n", + "{'loss': 0.8917, 'grad_norm': 6.9668869972229, 'learning_rate': 1.4285714285714285e-05, 'epoch': 0.03}\n", + "{'loss': 0.7893, 'grad_norm': 4.941070079803467, 'learning_rate': 2.857142857142857e-05, 'epoch': 0.06}\n", + "{'loss': 0.5842, 'grad_norm': 2.900670051574707, 'learning_rate': 4.2857142857142856e-05, 'epoch': 0.09}\n", + "{'loss': 0.5452, 'grad_norm': 1.3792134523391724, 'learning_rate': 5.714285714285714e-05, 'epoch': 0.11}\n", + "{'loss': 0.5008, 'grad_norm': 2.0141210556030273, 'learning_rate': 7.142857142857143e-05, 'epoch': 0.14}\n", + "{'loss': 0.4601, 'grad_norm': 1.0095895528793335, 'learning_rate': 8.571428571428571e-05, 'epoch': 0.17}\n", + "{'loss': 0.4191, 'grad_norm': 1.0098716020584106, 'learning_rate': 0.0001, 'epoch': 0.2}\n", + " 10%|████▏ | 35/350 [05:32<49:55, 9.51s/it][INFO|trainer.py:3819] 2024-09-21 15:52:31,446 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 15:52:31,446 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 15:52:31,446 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-1.5B-Instruct/checkpoint-35\n", + "[INFO|configuration_utils.py:733] 2024-09-21 15:54:02,730 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 15:54:02,731 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 1536,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 8960,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 28,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 15:54:02,777 >> tokenizer config file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-35/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 15:54:02,777 >> Special tokens file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-35/special_tokens_map.json\n", + "{'loss': 0.3736, 'grad_norm': 0.5351007580757141, 'learning_rate': 9.993784606094612e-05, 'epoch': 0.23}\n", + "{'loss': 0.3715, 'grad_norm': 0.634586751461029, 'learning_rate': 9.975153876827008e-05, 'epoch': 0.26}\n", + "{'loss': 0.372, 'grad_norm': 0.5977622270584106, 'learning_rate': 9.944154131125642e-05, 'epoch': 0.28}\n", + "{'loss': 0.3397, 'grad_norm': 0.709690272808075, 'learning_rate': 9.900862439242719e-05, 'epoch': 0.31}\n", + "{'loss': 0.3459, 'grad_norm': 0.4649967849254608, 'learning_rate': 9.84538643114539e-05, 'epoch': 0.34}\n", + "{'loss': 0.3089, 'grad_norm': 0.6929703950881958, 'learning_rate': 9.777864028930705e-05, 'epoch': 0.37}\n", + "{'loss': 0.3251, 'grad_norm': 0.762086033821106, 'learning_rate': 9.698463103929542e-05, 'epoch': 0.4}\n", + " 20%|████████▍ | 70/350 [12:40<44:20, 9.50s/it][INFO|trainer.py:3819] 2024-09-21 15:59:39,607 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 15:59:39,608 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 15:59:39,608 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-1.5B-Instruct/checkpoint-70\n", + "[INFO|configuration_utils.py:733] 2024-09-21 16:01:11,066 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 16:01:11,067 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 1536,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 8960,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 28,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 16:01:11,111 >> tokenizer config file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-70/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 16:01:11,111 >> Special tokens file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-70/special_tokens_map.json\n", + "{'loss': 0.3082, 'grad_norm': 0.8383176922798157, 'learning_rate': 9.607381059352038e-05, 'epoch': 0.43}\n", + "{'loss': 0.2929, 'grad_norm': 0.8879653215408325, 'learning_rate': 9.504844339512095e-05, 'epoch': 0.45}\n", + "{'loss': 0.3087, 'grad_norm': 1.3542834520339966, 'learning_rate': 9.391107866851143e-05, 'epoch': 0.48}\n", + "{'loss': 0.2889, 'grad_norm': 0.4832295775413513, 'learning_rate': 9.266454408160779e-05, 'epoch': 0.51}\n", + "{'loss': 0.2977, 'grad_norm': 0.7334930896759033, 'learning_rate': 9.131193871579975e-05, 'epoch': 0.54}\n", + "{'loss': 0.284, 'grad_norm': 0.9593209624290466, 'learning_rate': 8.985662536114613e-05, 'epoch': 0.57}\n", + "{'loss': 0.3081, 'grad_norm': 0.6446382403373718, 'learning_rate': 8.83022221559489e-05, 'epoch': 0.6}\n", + " 30%|████████████▎ | 105/350 [19:45<38:35, 9.45s/it][INFO|trainer.py:3819] 2024-09-21 16:06:43,938 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 16:06:43,938 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 16:06:43,938 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-1.5B-Instruct/checkpoint-105\n", + "[INFO|configuration_utils.py:733] 2024-09-21 16:08:46,048 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 16:08:46,049 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 1536,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 8960,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 28,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 16:08:46,094 >> tokenizer config file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-105/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 16:08:46,094 >> Special tokens file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-105/special_tokens_map.json\n", + "{'loss': 0.2875, 'grad_norm': 0.5638000965118408, 'learning_rate': 8.665259359149132e-05, 'epoch': 0.63}\n", + "{'loss': 0.2773, 'grad_norm': 0.8856341242790222, 'learning_rate': 8.491184090430364e-05, 'epoch': 0.65}\n", + "{'loss': 0.2728, 'grad_norm': 0.933649480342865, 'learning_rate': 8.308429187984297e-05, 'epoch': 0.68}\n", + "{'loss': 0.2676, 'grad_norm': 0.6383955478668213, 'learning_rate': 8.117449009293668e-05, 'epoch': 0.71}\n", + "{'loss': 0.2793, 'grad_norm': 0.6104869246482849, 'learning_rate': 7.91871836117395e-05, 'epoch': 0.74}\n", + "{'loss': 0.3009, 'grad_norm': 1.186869502067566, 'learning_rate': 7.712731319328798e-05, 'epoch': 0.77}\n", + "{'loss': 0.2825, 'grad_norm': 0.4962313175201416, 'learning_rate': 7.500000000000001e-05, 'epoch': 0.8}\n", + " 40%|████████████████▍ | 140/350 [27:21<33:34, 9.59s/it][INFO|trainer.py:3819] 2024-09-21 16:14:20,188 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 16:14:20,188 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 16:14:20,188 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-1.5B-Instruct/checkpoint-140\n", + "[INFO|configuration_utils.py:733] 2024-09-21 16:15:54,027 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 16:15:54,027 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 1536,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 8960,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 28,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 16:15:54,069 >> tokenizer config file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-140/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 16:15:54,069 >> Special tokens file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-140/special_tokens_map.json\n", + "{'loss': 0.2769, 'grad_norm': 0.8555291295051575, 'learning_rate': 7.281053286765815e-05, 'epoch': 0.82}\n", + "{'loss': 0.2896, 'grad_norm': 0.9149414300918579, 'learning_rate': 7.056435515653059e-05, 'epoch': 0.85}\n", + "{'loss': 0.2784, 'grad_norm': 0.47306105494499207, 'learning_rate': 6.826705121831976e-05, 'epoch': 0.88}\n", + "{'loss': 0.2722, 'grad_norm': 0.5558005571365356, 'learning_rate': 6.592433251258423e-05, 'epoch': 0.91}\n", + "{'loss': 0.2573, 'grad_norm': 1.440822958946228, 'learning_rate': 6.354202340715026e-05, 'epoch': 0.94}\n", + "{'loss': 0.2766, 'grad_norm': 0.8847922086715698, 'learning_rate': 6.112604669781572e-05, 'epoch': 0.97}\n", + "{'loss': 0.2693, 'grad_norm': 0.6979252099990845, 'learning_rate': 5.868240888334653e-05, 'epoch': 1.0}\n", + " 50%|████████████████████▌ | 175/350 [34:28<27:47, 9.53s/it][INFO|trainer.py:3819] 2024-09-21 16:21:27,237 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 16:21:27,237 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 16:21:27,237 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-1.5B-Instruct/checkpoint-175\n", + "[INFO|configuration_utils.py:733] 2024-09-21 16:23:01,512 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 16:23:01,513 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 1536,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 8960,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 28,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 16:23:01,550 >> tokenizer config file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-175/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 16:23:01,550 >> Special tokens file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-175/special_tokens_map.json\n", + "{'loss': 0.2619, 'grad_norm': 0.7065874934196472, 'learning_rate': 5.621718523237427e-05, 'epoch': 1.02}\n", + "{'loss': 0.2496, 'grad_norm': 0.9854199886322021, 'learning_rate': 5.373650467932122e-05, 'epoch': 1.05}\n", + "{'loss': 0.268, 'grad_norm': 1.2961649894714355, 'learning_rate': 5.124653458690365e-05, 'epoch': 1.08}\n", + "{'loss': 0.2473, 'grad_norm': 0.6371685862541199, 'learning_rate': 4.875346541309637e-05, 'epoch': 1.11}\n", + "{'loss': 0.2649, 'grad_norm': 0.5193257331848145, 'learning_rate': 4.626349532067879e-05, 'epoch': 1.14}\n", + "{'loss': 0.2671, 'grad_norm': 0.5210095643997192, 'learning_rate': 4.378281476762576e-05, 'epoch': 1.17}\n", + "{'loss': 0.2466, 'grad_norm': 0.6401721239089966, 'learning_rate': 4.131759111665349e-05, 'epoch': 1.19}\n", + " 60%|████████████████████████▌ | 210/350 [41:35<22:11, 9.51s/it][INFO|trainer.py:3819] 2024-09-21 16:28:33,947 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 16:28:33,947 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 16:28:33,947 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-1.5B-Instruct/checkpoint-210\n", + "[INFO|configuration_utils.py:733] 2024-09-21 16:30:09,340 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 16:30:09,341 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 1536,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 8960,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 28,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 16:30:09,379 >> tokenizer config file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-210/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 16:30:09,379 >> Special tokens file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-210/special_tokens_map.json\n", + "{'loss': 0.2345, 'grad_norm': 0.5983948707580566, 'learning_rate': 3.887395330218429e-05, 'epoch': 1.22}\n", + "{'loss': 0.2294, 'grad_norm': 0.8043653964996338, 'learning_rate': 3.6457976592849754e-05, 'epoch': 1.25}\n", + "{'loss': 0.2518, 'grad_norm': 0.9972067475318909, 'learning_rate': 3.4075667487415785e-05, 'epoch': 1.28}\n", + "{'loss': 0.2492, 'grad_norm': 0.8310278654098511, 'learning_rate': 3.173294878168025e-05, 'epoch': 1.31}\n", + "{'loss': 0.2547, 'grad_norm': 0.6404473781585693, 'learning_rate': 2.9435644843469436e-05, 'epoch': 1.34}\n", + "{'loss': 0.2495, 'grad_norm': 0.7588335871696472, 'learning_rate': 2.718946713234185e-05, 'epoch': 1.36}\n", + "{'loss': 0.2733, 'grad_norm': 0.6872820854187012, 'learning_rate': 2.500000000000001e-05, 'epoch': 1.39}\n", + " 70%|████████████████████████████▋ | 245/350 [48:44<16:51, 9.64s/it][INFO|trainer.py:3819] 2024-09-21 16:35:43,271 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 16:35:43,271 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 16:35:43,271 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-1.5B-Instruct/checkpoint-245\n", + "[INFO|configuration_utils.py:733] 2024-09-21 16:37:18,128 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 16:37:18,129 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 1536,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 8960,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 28,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 16:37:18,161 >> tokenizer config file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-245/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 16:37:18,161 >> Special tokens file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-245/special_tokens_map.json\n", + "{'loss': 0.2567, 'grad_norm': 0.8496139645576477, 'learning_rate': 2.2872686806712035e-05, 'epoch': 1.42}\n", + "{'loss': 0.2311, 'grad_norm': 0.6145327091217041, 'learning_rate': 2.0812816388260518e-05, 'epoch': 1.45}\n", + "{'loss': 0.2171, 'grad_norm': 0.6917315721511841, 'learning_rate': 1.8825509907063327e-05, 'epoch': 1.48}\n", + "{'loss': 0.2285, 'grad_norm': 0.8362339735031128, 'learning_rate': 1.691570812015704e-05, 'epoch': 1.51}\n", + "{'loss': 0.2643, 'grad_norm': 0.8186646699905396, 'learning_rate': 1.5088159095696363e-05, 'epoch': 1.54}\n", + "{'loss': 0.2331, 'grad_norm': 0.9536941051483154, 'learning_rate': 1.3347406408508695e-05, 'epoch': 1.56}\n", + "{'loss': 0.2396, 'grad_norm': 0.5406892895698547, 'learning_rate': 1.1697777844051105e-05, 'epoch': 1.59}\n", + " 80%|████████████████████████████████▊ | 280/350 [55:51<11:04, 9.49s/it][INFO|trainer.py:3819] 2024-09-21 16:42:50,749 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 16:42:50,749 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 16:42:50,749 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-1.5B-Instruct/checkpoint-280\n", + "[INFO|configuration_utils.py:733] 2024-09-21 16:44:26,381 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 16:44:26,381 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 1536,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 8960,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 28,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 16:44:26,417 >> tokenizer config file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-280/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 16:44:26,417 >> Special tokens file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-280/special_tokens_map.json\n", + "{'loss': 0.2464, 'grad_norm': 0.9152795076370239, 'learning_rate': 1.0143374638853891e-05, 'epoch': 1.62}\n", + "{'loss': 0.2451, 'grad_norm': 0.6611983776092529, 'learning_rate': 8.688061284200266e-06, 'epoch': 1.65}\n", + "{'loss': 0.2268, 'grad_norm': 0.6333246827125549, 'learning_rate': 7.33545591839222e-06, 'epoch': 1.68}\n", + "{'loss': 0.2407, 'grad_norm': 0.6191487312316895, 'learning_rate': 6.088921331488568e-06, 'epoch': 1.71}\n", + "{'loss': 0.2383, 'grad_norm': 0.8804998397827148, 'learning_rate': 4.951556604879048e-06, 'epoch': 1.73}\n", + "{'loss': 0.2203, 'grad_norm': 0.7381444573402405, 'learning_rate': 3.9261894064796135e-06, 'epoch': 1.76}\n", + "{'loss': 0.2373, 'grad_norm': 0.8267008066177368, 'learning_rate': 3.0153689607045845e-06, 'epoch': 1.79}\n", + " 90%|███████████████████████████████████ | 315/350 [1:03:00<05:31, 9.47s/it][INFO|trainer.py:3819] 2024-09-21 16:49:59,378 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 16:49:59,378 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 16:49:59,378 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-1.5B-Instruct/checkpoint-315\n", + "[INFO|configuration_utils.py:733] 2024-09-21 16:51:33,921 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 16:51:33,921 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 1536,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 8960,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 28,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 16:51:33,959 >> tokenizer config file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-315/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 16:51:33,959 >> Special tokens file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-315/special_tokens_map.json\n", + "{'loss': 0.2281, 'grad_norm': 0.8259851932525635, 'learning_rate': 2.221359710692961e-06, 'epoch': 1.82}\n", + "{'loss': 0.2418, 'grad_norm': 0.7553776502609253, 'learning_rate': 1.5461356885461075e-06, 'epoch': 1.85}\n", + "{'loss': 0.2474, 'grad_norm': 1.2472189664840698, 'learning_rate': 9.913756075728087e-07, 'epoch': 1.88}\n", + "{'loss': 0.2323, 'grad_norm': 0.5642313957214355, 'learning_rate': 5.584586887435739e-07, 'epoch': 1.9}\n", + "{'loss': 0.239, 'grad_norm': 0.6548100709915161, 'learning_rate': 2.4846123172992954e-07, 'epoch': 1.93}\n", + "{'loss': 0.2458, 'grad_norm': 0.706020176410675, 'learning_rate': 6.215393905388278e-08, 'epoch': 1.96}\n", + "{'loss': 0.2413, 'grad_norm': 0.7654218077659607, 'learning_rate': 0.0, 'epoch': 1.99}\n", + "100%|███████████████████████████████████████| 350/350 [1:10:07<00:00, 9.51s/it][INFO|trainer.py:3819] 2024-09-21 16:57:06,607 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 16:57:06,607 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 16:57:06,607 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-1.5B-Instruct/checkpoint-350\n", + "[INFO|configuration_utils.py:733] 2024-09-21 16:58:42,391 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 16:58:42,391 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 1536,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 8960,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 28,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 16:58:42,429 >> tokenizer config file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-350/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 16:58:42,429 >> Special tokens file saved in saves/Qwen2.5-1.5B-Instruct/checkpoint-350/special_tokens_map.json\n", + "[INFO|trainer.py:2394] 2024-09-21 16:58:42,586 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "{'train_runtime': 4303.7472, 'train_samples_per_second': 10.456, 'train_steps_per_second': 0.081, 'train_loss': 0.30218201875686646, 'epoch': 1.99}\n", + "100%|███████████████████████████████████████| 350/350 [1:11:43<00:00, 12.30s/it]\n", + "[INFO|trainer.py:3503] 2024-09-21 16:58:42,587 >> Saving model checkpoint to saves/Qwen2.5-1.5B-Instruct\n", + "[INFO|configuration_utils.py:733] 2024-09-21 16:58:43,138 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/5fee7c4ed634dc66c6e318c8ac2897b8b9154536/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 16:58:43,139 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 1536,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 8960,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 28,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 16:58:43,171 >> tokenizer config file saved in saves/Qwen2.5-1.5B-Instruct/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 16:58:43,171 >> Special tokens file saved in saves/Qwen2.5-1.5B-Instruct/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 1.99\n", + " total_flos = 161035698GF\n", + " train_loss = 0.3022\n", + " train_runtime = 1:11:43.74\n", + " train_samples_per_second = 10.456\n", + " train_steps_per_second = 0.081\n", + "Figure saved at: saves/Qwen2.5-1.5B-Instruct/training_loss.png\n", + "Figure saved at: saves/Qwen2.5-1.5B-Instruct/training_eval_loss.png\n", + "09/21/2024 16:58:43 - WARNING - llamafactory.extras.ploting - No metric eval_accuracy to plot.\n", + "[INFO|trainer.py:3819] 2024-09-21 16:58:43,425 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 16:58:43,425 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 16:58:43,425 >> Batch size = 1\n", + "100%|███████████████████████████████████████| 2500/2500 [01:36<00:00, 26.04it/s]\n", + "***** eval metrics *****\n", + " epoch = 1.99\n", + " eval_loss = 0.2388\n", + " eval_runtime = 0:01:36.10\n", + " eval_samples_per_second = 26.012\n", + " eval_steps_per_second = 26.012\n", + "[INFO|modelcard.py:449] 2024-09-21 17:00:19,534 >> Dropping the following result as it does not have all the necessary fields:\n", + "{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}\n", + "Current Directory:\n", + "/home/inflaton/code/logical-reasoning/llama-factory\n", + "loading env vars from: /home/inflaton/code/logical-reasoning/.env\n", + "Adding /home/inflaton/code/logical-reasoning to sys.path\n", + "loading /home/inflaton/code/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n", + "Qwen Qwen2.5-0.5B-Instruct qwen config/mgtv_template.yaml ../datasets/mgtv\n", + "Writing to config/models/Qwen2.5-0.5B-Instruct.yaml\n", + "config/models/Qwen2.5-0.5B-Instruct.yaml:\n", + " {\n", + " \"model_name_or_path\": \"Qwen/Qwen2.5-0.5B-Instruct\",\n", + " \"stage\": \"sft\",\n", + " \"do_train\": true,\n", + " \"finetuning_type\": \"lora\",\n", + " \"lora_target\": \"all\",\n", + " \"dataset\": \"alpaca_mgtv_p2\",\n", + " \"template\": \"qwen\",\n", + " \"cutoff_len\": 8192,\n", + " \"max_samples\": 25000,\n", + " \"overwrite_cache\": true,\n", + " \"preprocessing_num_workers\": 16,\n", + " \"output_dir\": \"saves/Qwen2.5-0.5B-Instruct\",\n", + " \"logging_steps\": 5,\n", + " \"save_steps\": 35,\n", + " \"plot_loss\": true,\n", + " \"per_device_train_batch_size\": 16,\n", + " \"gradient_accumulation_steps\": 8,\n", + " \"learning_rate\": 0.0001,\n", + " \"num_train_epochs\": 2.0,\n", + " \"lr_scheduler_type\": \"cosine\",\n", + " \"warmup_ratio\": 0.1,\n", + " \"bf16\": true,\n", + " \"ddp_timeout\": 180000000,\n", + " \"val_size\": 0.1,\n", + " \"per_device_eval_batch_size\": 1,\n", + " \"eval_strategy\": \"steps\",\n", + " \"eval_steps\": 35,\n", + " \"report_to\": \"none\",\n", + " \"run_name\": \"Qwen2.5-0.5B-Instruct_lora_sft\"\n", + "}\n", + "09/21/2024 17:00:25 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16\n", + "[INFO|configuration_utils.py:733] 2024-09-21 17:00:26,916 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 17:00:26,917 >> Model config Qwen2Config {\n", + " \"_name_or_path\": \"Qwen/Qwen2.5-0.5B-Instruct\",\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 896,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 4864,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 14,\n", + " \"num_hidden_layers\": 24,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 17:00:27,186 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/vocab.json\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 17:00:27,186 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/merges.txt\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 17:00:27,186 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/tokenizer.json\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 17:00:27,186 >> loading file added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 17:00:27,186 >> loading file special_tokens_map.json from cache at None\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 17:00:27,186 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2533] 2024-09-21 17:00:27,298 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", + "[INFO|configuration_utils.py:733] 2024-09-21 17:00:28,386 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 17:00:28,386 >> Model config Qwen2Config {\n", + " \"_name_or_path\": \"Qwen/Qwen2.5-0.5B-Instruct\",\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 896,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 4864,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 14,\n", + " \"num_hidden_layers\": 24,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 17:00:28,652 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/vocab.json\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 17:00:28,652 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/merges.txt\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 17:00:28,652 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/tokenizer.json\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 17:00:28,652 >> loading file added_tokens.json from cache at None\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 17:00:28,652 >> loading file special_tokens_map.json from cache at None\n", + "[INFO|tokenization_utils_base.py:2289] 2024-09-21 17:00:28,652 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2533] 2024-09-21 17:00:28,744 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", + "09/21/2024 17:00:28 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", + "09/21/2024 17:00:28 - INFO - llamafactory.data.loader - Loading dataset alpaca_mgtv_p2.json...\n", + "Converting format of dataset (num_proc=16): 100%|█| 25000/25000 [00:00<00:00, 95\n", + "Running tokenizer on dataset (num_proc=16): 100%|█| 25000/25000 [00:01<00:00, 15\n", + "training example:\n", + "input_ids:\n", + "[151644, 8948, 198, 2610, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 56568, 101909, 108024, 101497, 107969, 99329, 9370, 106040, 1773, 99329, 104190, 104506, 48443, 16, 13, 26853, 224, 57218, 28946, 36993, 101051, 46944, 107969, 27091, 3837, 107969, 27091, 36993, 53481, 46944, 100405, 99518, 104151, 101128, 9370, 57621, 8997, 17, 13, 89982, 68878, 17340, 99392, 107969, 99413, 3837, 107969, 99413, 20412, 107969, 27091, 111230, 8997, 18, 13, 26853, 224, 57218, 28946, 73670, 105396, 99885, 106386, 28330, 86119, 112469, 100246, 57621, 9370, 106538, 8997, 19, 13, 69162, 34204, 103991, 86119, 3837, 106040, 44063, 100345, 107591, 102104, 87752, 105220, 109487, 100653, 5122, 20412, 5373, 99520, 5373, 16530, 99335, 5373, 102104, 88991, 5373, 56007, 24339, 32100, 1773, 99200, 102104, 9370, 104317, 100142, 104506, 28311, 256, 481, 92498, 107969, 27091, 33108, 107969, 99413, 114562, 86119, 111230, 3837, 102104, 5122, 20412, 100631, 99520, 198, 256, 481, 92498, 107969, 27091, 33108, 107969, 99413, 53153, 101041, 100631, 108349, 83751, 63789, 20221, 86119, 111230, 3837, 102104, 5122, 16530, 99335, 198, 256, 481, 92498, 111842, 107666, 113479, 106386, 28330, 86119, 100631, 86119, 104151, 101128, 3837, 102104, 5122, 56007, 24339, 32100, 198, 256, 481, 92498, 111842, 107666, 99797, 108670, 34187, 107969, 99413, 106538, 3837, 102104, 5122, 102104, 88991, 198, 20, 13, 49602, 252, 99590, 15946, 53153, 42855, 99885, 102158, 27369, 3837, 105827, 65770, 99475, 109487, 101047, 110281, 18600, 1773, 77557, 3837, 108620, 99360, 2073, 99520, 854, 65770, 99475, 12857, 2073, 16530, 96332, 14880, 110439, 100001, 104190, 102104, 111842, 101080, 103936, 3407, 334, 107969, 27091, 66963, 73562, 109628, 45629, 105489, 3837, 104133, 111718, 106023, 5122, 101988, 115865, 110731, 9370, 105419, 3837, 115865, 99810, 69249, 59743, 104133, 104003, 115865, 36993, 16530, 101401, 68536, 99723, 3837, 115967, 104270, 102060, 110666, 112031, 1773, 14880, 109363, 115865, 110786, 101423, 104249, 3407, 334, 107969, 99413, 66963, 10236, 250, 253, 48921, 101221, 57218, 101961, 7948, 100894, 9370, 99288, 99818, 101063, 1773, 104269, 99288, 99818, 100774, 13343, 3837, 99798, 57218, 101961, 105664, 102373, 48921, 100271, 1773, 99650, 105616, 18493, 115865, 110731, 9370, 105419, 104388, 1773, 103968, 3837, 102606, 102115, 17340, 3837, 102373, 18493, 106340, 24562, 99774, 82224, 104424, 15946, 99372, 99244, 1773, 110597, 9370, 99288, 99818, 100012, 101416, 63109, 99242, 9370, 102373, 3837, 101988, 101938, 44063, 104003, 115865, 101329, 99314, 3837, 107974, 102373, 9370, 104575, 24562, 3837, 105699, 116418, 100005, 103000, 90663, 1773, 100147, 101070, 105443, 34187, 100097, 3837, 104989, 100833, 69249, 46944, 105190, 9370, 106023, 3407, 334, 111842, 101080, 103936, 66963, 4891, 223, 115, 100623, 21317, 99315, 101037, 198, 151645, 198, 151644, 77091, 198, 99520, 151645]\n", + "inputs:\n", + "<|im_start|>system\n", + "You are a helpful assistant.<|im_end|>\n", + "<|im_start|>user\n", + "你是一个情景猜谜游戏的主持人。游戏规则如下:\n", + "\n", + "1. 参与者会得到一个谜面,谜面会描述一个简单又难以理解的事件。\n", + "2. 主持人知道谜底,谜底是谜面的答案。\n", + "3. 参与者可以询问任何封闭式问题来找寻事件的真相。\n", + "4. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。各回答的判断标准如下:\n", + " - 若谜面和谜底能找到问题的答案,回答:是或者不是\n", + " - 若谜面和谜底不能直接或者间接推断出问题的答案,回答:不重要\n", + " - 若参与者提问不是一个封闭式问题或者问题难以理解,回答:问法错误\n", + " - 若参与者提问基本还原了谜底真相,回答:回答正确\n", + "5. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n", + "\n", + "请严格按照这些规则回答参与者提出的问题。\n", + "\n", + "**谜面:** 在甄家村里,有一个古老的传说:每年南瓜丰收的季节,南瓜田里总有一个最大的南瓜会不翼而飞,村民们对此现象困惑不解。请找出南瓜失踪背后的原因。\n", + "\n", + "**谜底:** 真相原来与一位年迈的农夫有关。这位农夫年轻时,曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季节结婚。然而,命运弄人,姑娘在婚礼前的一场意外中离世。悲伤的农夫为了纪念心爱的姑娘,每年都会将最大的南瓜偷走,放到姑娘的墓前,以此寄托自己的哀思。这一行为延续了多年,成为了乡村里一个神秘的传说。\n", + "\n", + "**参与者提出的问题:** 偷的人信神吗\n", + "<|im_end|>\n", + "<|im_start|>assistant\n", + "不是<|im_end|>\n", + "label_ids:\n", + "[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 99520, 151645]\n", + "labels:\n", + "不是<|im_end|>\n", + "[INFO|configuration_utils.py:733] 2024-09-21 17:00:32,420 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 17:00:32,420 >> Model config Qwen2Config {\n", + " \"_name_or_path\": \"Qwen/Qwen2.5-0.5B-Instruct\",\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 896,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 4864,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 14,\n", + " \"num_hidden_layers\": 24,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|modeling_utils.py:3634] 2024-09-21 17:00:32,432 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/model.safetensors\n", + "[INFO|modeling_utils.py:1572] 2024-09-21 17:00:32,436 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", + "[INFO|configuration_utils.py:1038] 2024-09-21 17:00:32,437 >> Generate config GenerationConfig {\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645\n", + "}\n", + "\n", + "[INFO|modeling_utils.py:4463] 2024-09-21 17:00:58,518 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", + "\n", + "[INFO|modeling_utils.py:4471] 2024-09-21 17:00:58,518 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2.5-0.5B-Instruct.\n", + "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", + "[INFO|configuration_utils.py:993] 2024-09-21 17:00:58,782 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/generation_config.json\n", + "[INFO|configuration_utils.py:1038] 2024-09-21 17:00:58,782 >> Generate config GenerationConfig {\n", + " \"bos_token_id\": 151643,\n", + " \"do_sample\": true,\n", + " \"eos_token_id\": [\n", + " 151645,\n", + " 151643\n", + " ],\n", + " \"pad_token_id\": 151643,\n", + " \"repetition_penalty\": 1.1,\n", + " \"temperature\": 0.7,\n", + " \"top_k\": 20,\n", + " \"top_p\": 0.8\n", + "}\n", + "\n", + "09/21/2024 17:00:58 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.\n", + "09/21/2024 17:00:58 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", + "09/21/2024 17:00:58 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.\n", + "09/21/2024 17:00:58 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA\n", + "09/21/2024 17:00:58 - INFO - llamafactory.model.model_utils.misc - Found linear modules: gate_proj,up_proj,k_proj,o_proj,q_proj,v_proj,down_proj\n", + "09/21/2024 17:00:59 - INFO - llamafactory.model.loader - trainable params: 4,399,104 || all params: 498,431,872 || trainable%: 0.8826\n", + "[INFO|trainer.py:648] 2024-09-21 17:00:59,124 >> Using auto half precision backend\n", + "[INFO|trainer.py:2134] 2024-09-21 17:00:59,440 >> ***** Running training *****\n", + "[INFO|trainer.py:2135] 2024-09-21 17:00:59,440 >> Num examples = 22,500\n", + "[INFO|trainer.py:2136] 2024-09-21 17:00:59,440 >> Num Epochs = 2\n", + "[INFO|trainer.py:2137] 2024-09-21 17:00:59,440 >> Instantaneous batch size per device = 16\n", + "[INFO|trainer.py:2140] 2024-09-21 17:00:59,440 >> Total train batch size (w. parallel, distributed & accumulation) = 128\n", + "[INFO|trainer.py:2141] 2024-09-21 17:00:59,440 >> Gradient Accumulation steps = 8\n", + "[INFO|trainer.py:2142] 2024-09-21 17:00:59,440 >> Total optimization steps = 350\n", + "[INFO|trainer.py:2143] 2024-09-21 17:00:59,441 >> Number of trainable parameters = 4,399,104\n", + "{'loss': 1.1184, 'grad_norm': 15.737517356872559, 'learning_rate': 1.4285714285714285e-05, 'epoch': 0.03}\n", + "{'loss': 0.9305, 'grad_norm': 7.921570777893066, 'learning_rate': 2.857142857142857e-05, 'epoch': 0.06}\n", + "{'loss': 0.6076, 'grad_norm': 3.3468737602233887, 'learning_rate': 4.2857142857142856e-05, 'epoch': 0.09}\n", + "{'loss': 0.5437, 'grad_norm': 2.368464708328247, 'learning_rate': 5.714285714285714e-05, 'epoch': 0.11}\n", + "{'loss': 0.5106, 'grad_norm': 3.843911647796631, 'learning_rate': 7.142857142857143e-05, 'epoch': 0.14}\n", + "{'loss': 0.4692, 'grad_norm': 1.8563235998153687, 'learning_rate': 8.571428571428571e-05, 'epoch': 0.17}\n", + "{'loss': 0.4381, 'grad_norm': 1.487918496131897, 'learning_rate': 0.0001, 'epoch': 0.2}\n", + " 10%|████▏ | 35/350 [02:29<22:10, 4.22s/it][INFO|trainer.py:3819] 2024-09-21 17:03:28,974 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 17:03:28,974 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 17:03:28,974 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-0.5B-Instruct/checkpoint-35\n", + "[INFO|configuration_utils.py:733] 2024-09-21 17:04:52,623 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 17:04:52,623 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 896,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 4864,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 14,\n", + " \"num_hidden_layers\": 24,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 17:04:52,647 >> tokenizer config file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-35/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 17:04:52,647 >> Special tokens file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-35/special_tokens_map.json\n", + "{'loss': 0.4104, 'grad_norm': 2.7992444038391113, 'learning_rate': 9.993784606094612e-05, 'epoch': 0.23}\n", + "{'loss': 0.3919, 'grad_norm': 1.2948148250579834, 'learning_rate': 9.975153876827008e-05, 'epoch': 0.26}\n", + "{'loss': 0.3958, 'grad_norm': 2.0029242038726807, 'learning_rate': 9.944154131125642e-05, 'epoch': 0.28}\n", + "{'loss': 0.3703, 'grad_norm': 1.9004690647125244, 'learning_rate': 9.900862439242719e-05, 'epoch': 0.31}\n", + "{'loss': 0.3837, 'grad_norm': 2.839643716812134, 'learning_rate': 9.84538643114539e-05, 'epoch': 0.34}\n", + "{'loss': 0.3435, 'grad_norm': 1.7279853820800781, 'learning_rate': 9.777864028930705e-05, 'epoch': 0.37}\n", + "{'loss': 0.3556, 'grad_norm': 1.0835622549057007, 'learning_rate': 9.698463103929542e-05, 'epoch': 0.4}\n", + " 20%|████████▍ | 70/350 [06:21<19:49, 4.25s/it][INFO|trainer.py:3819] 2024-09-21 17:07:21,360 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 17:07:21,360 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 17:07:21,360 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-0.5B-Instruct/checkpoint-70\n", + "[INFO|configuration_utils.py:733] 2024-09-21 17:08:43,882 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 17:08:43,882 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 896,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 4864,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 14,\n", + " \"num_hidden_layers\": 24,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 17:08:43,903 >> tokenizer config file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-70/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 17:08:43,903 >> Special tokens file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-70/special_tokens_map.json\n", + "{'loss': 0.3216, 'grad_norm': 1.249293565750122, 'learning_rate': 9.607381059352038e-05, 'epoch': 0.43}\n", + "{'loss': 0.3061, 'grad_norm': 1.9808311462402344, 'learning_rate': 9.504844339512095e-05, 'epoch': 0.45}\n", + "{'loss': 0.3325, 'grad_norm': 2.327874183654785, 'learning_rate': 9.391107866851143e-05, 'epoch': 0.48}\n", + "{'loss': 0.332, 'grad_norm': 2.0999391078948975, 'learning_rate': 9.266454408160779e-05, 'epoch': 0.51}\n", + "{'loss': 0.3349, 'grad_norm': 2.11915922164917, 'learning_rate': 9.131193871579975e-05, 'epoch': 0.54}\n", + "{'loss': 0.3162, 'grad_norm': 1.733162760734558, 'learning_rate': 8.985662536114613e-05, 'epoch': 0.57}\n", + "{'loss': 0.3228, 'grad_norm': 1.1676844358444214, 'learning_rate': 8.83022221559489e-05, 'epoch': 0.6}\n", + " 30%|████████████▎ | 105/350 [10:13<17:14, 4.22s/it][INFO|trainer.py:3819] 2024-09-21 17:11:12,460 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 17:11:12,460 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 17:11:12,460 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-0.5B-Instruct/checkpoint-105\n", + "[INFO|configuration_utils.py:733] 2024-09-21 17:12:34,739 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 17:12:34,739 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 896,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 4864,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 14,\n", + " \"num_hidden_layers\": 24,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 17:12:34,760 >> tokenizer config file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-105/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 17:12:34,760 >> Special tokens file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-105/special_tokens_map.json\n", + "{'loss': 0.3165, 'grad_norm': 1.040158748626709, 'learning_rate': 8.665259359149132e-05, 'epoch': 0.63}\n", + "{'loss': 0.3019, 'grad_norm': 1.2740882635116577, 'learning_rate': 8.491184090430364e-05, 'epoch': 0.65}\n", + "{'loss': 0.3119, 'grad_norm': 1.2684509754180908, 'learning_rate': 8.308429187984297e-05, 'epoch': 0.68}\n", + "{'loss': 0.2917, 'grad_norm': 1.1115374565124512, 'learning_rate': 8.117449009293668e-05, 'epoch': 0.71}\n", + "{'loss': 0.2885, 'grad_norm': 1.1410064697265625, 'learning_rate': 7.91871836117395e-05, 'epoch': 0.74}\n", + "{'loss': 0.3216, 'grad_norm': 2.9339027404785156, 'learning_rate': 7.712731319328798e-05, 'epoch': 0.77}\n", + "{'loss': 0.3026, 'grad_norm': 1.4184118509292603, 'learning_rate': 7.500000000000001e-05, 'epoch': 0.8}\n", + " 40%|████████████████▍ | 140/350 [14:04<14:55, 4.26s/it][INFO|trainer.py:3819] 2024-09-21 17:15:04,041 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 17:15:04,041 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 17:15:04,041 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-0.5B-Instruct/checkpoint-140\n", + "[INFO|configuration_utils.py:733] 2024-09-21 17:16:27,259 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 17:16:27,260 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 896,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 4864,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 14,\n", + " \"num_hidden_layers\": 24,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 17:16:27,281 >> tokenizer config file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-140/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 17:16:27,281 >> Special tokens file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-140/special_tokens_map.json\n", + "{'loss': 0.2798, 'grad_norm': 1.3493735790252686, 'learning_rate': 7.281053286765815e-05, 'epoch': 0.82}\n", + "{'loss': 0.3084, 'grad_norm': 1.1956149339675903, 'learning_rate': 7.056435515653059e-05, 'epoch': 0.85}\n", + "{'loss': 0.2996, 'grad_norm': 1.1371078491210938, 'learning_rate': 6.826705121831976e-05, 'epoch': 0.88}\n", + "{'loss': 0.2936, 'grad_norm': 1.1137551069259644, 'learning_rate': 6.592433251258423e-05, 'epoch': 0.91}\n", + "{'loss': 0.2718, 'grad_norm': 2.9275758266448975, 'learning_rate': 6.354202340715026e-05, 'epoch': 0.94}\n", + "{'loss': 0.3136, 'grad_norm': 2.6529040336608887, 'learning_rate': 6.112604669781572e-05, 'epoch': 0.97}\n", + "{'loss': 0.2802, 'grad_norm': 1.0029186010360718, 'learning_rate': 5.868240888334653e-05, 'epoch': 1.0}\n", + " 50%|████████████████████▌ | 175/350 [17:56<12:23, 4.25s/it][INFO|trainer.py:3819] 2024-09-21 17:18:55,953 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 17:18:55,953 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 17:18:55,953 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-0.5B-Instruct/checkpoint-175\n", + "[INFO|configuration_utils.py:733] 2024-09-21 17:20:17,688 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 17:20:17,689 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 896,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 4864,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 14,\n", + " \"num_hidden_layers\": 24,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 17:20:17,710 >> tokenizer config file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-175/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 17:20:17,710 >> Special tokens file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-175/special_tokens_map.json\n", + "{'loss': 0.2674, 'grad_norm': 1.5481024980545044, 'learning_rate': 5.621718523237427e-05, 'epoch': 1.02}\n", + "{'loss': 0.2629, 'grad_norm': 1.1264318227767944, 'learning_rate': 5.373650467932122e-05, 'epoch': 1.05}\n", + "{'loss': 0.2732, 'grad_norm': 1.3646587133407593, 'learning_rate': 5.124653458690365e-05, 'epoch': 1.08}\n", + "{'loss': 0.271, 'grad_norm': 0.825769305229187, 'learning_rate': 4.875346541309637e-05, 'epoch': 1.11}\n", + "{'loss': 0.2714, 'grad_norm': 0.9982427954673767, 'learning_rate': 4.626349532067879e-05, 'epoch': 1.14}\n", + "{'loss': 0.2962, 'grad_norm': 2.239053964614868, 'learning_rate': 4.378281476762576e-05, 'epoch': 1.17}\n", + "{'loss': 0.2645, 'grad_norm': 0.8168760538101196, 'learning_rate': 4.131759111665349e-05, 'epoch': 1.19}\n", + " 60%|████████████████████████▌ | 210/350 [21:46<09:53, 4.24s/it][INFO|trainer.py:3819] 2024-09-21 17:22:46,303 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 17:22:46,303 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 17:22:46,303 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-0.5B-Instruct/checkpoint-210\n", + "[INFO|configuration_utils.py:733] 2024-09-21 17:24:07,801 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 17:24:07,801 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 896,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 4864,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 14,\n", + " \"num_hidden_layers\": 24,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 17:24:07,827 >> tokenizer config file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-210/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 17:24:07,827 >> Special tokens file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-210/special_tokens_map.json\n", + "{'loss': 0.2621, 'grad_norm': 1.397253155708313, 'learning_rate': 3.887395330218429e-05, 'epoch': 1.22}\n", + "{'loss': 0.2458, 'grad_norm': 1.1573820114135742, 'learning_rate': 3.6457976592849754e-05, 'epoch': 1.25}\n", + "{'loss': 0.2651, 'grad_norm': 2.0793874263763428, 'learning_rate': 3.4075667487415785e-05, 'epoch': 1.28}\n", + "{'loss': 0.2654, 'grad_norm': 1.2757207155227661, 'learning_rate': 3.173294878168025e-05, 'epoch': 1.31}\n", + "{'loss': 0.263, 'grad_norm': 1.1064047813415527, 'learning_rate': 2.9435644843469436e-05, 'epoch': 1.34}\n", + "{'loss': 0.2776, 'grad_norm': 1.7825045585632324, 'learning_rate': 2.718946713234185e-05, 'epoch': 1.36}\n", + "{'loss': 0.3012, 'grad_norm': 1.2948428392410278, 'learning_rate': 2.500000000000001e-05, 'epoch': 1.39}\n", + " 70%|████████████████████████████▋ | 245/350 [25:37<07:31, 4.30s/it][INFO|trainer.py:3819] 2024-09-21 17:26:36,677 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 17:26:36,678 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 17:26:36,678 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-0.5B-Instruct/checkpoint-245\n", + "[INFO|configuration_utils.py:733] 2024-09-21 17:27:57,841 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 17:27:57,841 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 896,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 4864,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 14,\n", + " \"num_hidden_layers\": 24,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 17:27:57,863 >> tokenizer config file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-245/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 17:27:57,863 >> Special tokens file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-245/special_tokens_map.json\n", + "{'loss': 0.2677, 'grad_norm': 1.3902900218963623, 'learning_rate': 2.2872686806712035e-05, 'epoch': 1.42}\n", + "{'loss': 0.2483, 'grad_norm': 1.1757906675338745, 'learning_rate': 2.0812816388260518e-05, 'epoch': 1.45}\n", + "{'loss': 0.2406, 'grad_norm': 1.2844176292419434, 'learning_rate': 1.8825509907063327e-05, 'epoch': 1.48}\n", + "{'loss': 0.2531, 'grad_norm': 1.3673418760299683, 'learning_rate': 1.691570812015704e-05, 'epoch': 1.51}\n", + "{'loss': 0.2702, 'grad_norm': 1.176766276359558, 'learning_rate': 1.5088159095696363e-05, 'epoch': 1.54}\n", + "{'loss': 0.255, 'grad_norm': 1.8224906921386719, 'learning_rate': 1.3347406408508695e-05, 'epoch': 1.56}\n", + "{'loss': 0.2628, 'grad_norm': 1.1993753910064697, 'learning_rate': 1.1697777844051105e-05, 'epoch': 1.59}\n", + " 80%|████████████████████████████████▊ | 280/350 [29:27<04:56, 4.24s/it][INFO|trainer.py:3819] 2024-09-21 17:30:26,762 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 17:30:26,762 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 17:30:26,762 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-0.5B-Instruct/checkpoint-280\n", + "[INFO|configuration_utils.py:733] 2024-09-21 17:31:47,861 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 17:31:47,861 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 896,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 4864,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 14,\n", + " \"num_hidden_layers\": 24,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 17:31:47,885 >> tokenizer config file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-280/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 17:31:47,885 >> Special tokens file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-280/special_tokens_map.json\n", + "{'loss': 0.2624, 'grad_norm': 1.6778804063796997, 'learning_rate': 1.0143374638853891e-05, 'epoch': 1.62}\n", + "{'loss': 0.2604, 'grad_norm': 1.305879831314087, 'learning_rate': 8.688061284200266e-06, 'epoch': 1.65}\n", + "{'loss': 0.234, 'grad_norm': 1.0664318799972534, 'learning_rate': 7.33545591839222e-06, 'epoch': 1.68}\n", + "{'loss': 0.2584, 'grad_norm': 1.422717571258545, 'learning_rate': 6.088921331488568e-06, 'epoch': 1.71}\n", + "{'loss': 0.2521, 'grad_norm': 1.3670940399169922, 'learning_rate': 4.951556604879048e-06, 'epoch': 1.73}\n", + "{'loss': 0.2412, 'grad_norm': 1.36295747756958, 'learning_rate': 3.9261894064796135e-06, 'epoch': 1.76}\n", + "{'loss': 0.2493, 'grad_norm': 1.4943523406982422, 'learning_rate': 3.0153689607045845e-06, 'epoch': 1.79}\n", + " 90%|████████████████████████████████████▉ | 315/350 [33:17<02:29, 4.26s/it][INFO|trainer.py:3819] 2024-09-21 17:34:16,745 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 17:34:16,745 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 17:34:16,745 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-0.5B-Instruct/checkpoint-315\n", + "[INFO|configuration_utils.py:733] 2024-09-21 17:35:37,535 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 17:35:37,536 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 896,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 4864,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 14,\n", + " \"num_hidden_layers\": 24,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 17:35:37,562 >> tokenizer config file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-315/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 17:35:37,562 >> Special tokens file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-315/special_tokens_map.json\n", + "{'loss': 0.2384, 'grad_norm': 1.4463350772857666, 'learning_rate': 2.221359710692961e-06, 'epoch': 1.82}\n", + "{'loss': 0.2504, 'grad_norm': 1.4378561973571777, 'learning_rate': 1.5461356885461075e-06, 'epoch': 1.85}\n", + "{'loss': 0.2644, 'grad_norm': 2.302417278289795, 'learning_rate': 9.913756075728087e-07, 'epoch': 1.88}\n", + "{'loss': 0.2634, 'grad_norm': 1.0203107595443726, 'learning_rate': 5.584586887435739e-07, 'epoch': 1.9}\n", + "{'loss': 0.2678, 'grad_norm': 1.4266246557235718, 'learning_rate': 2.4846123172992954e-07, 'epoch': 1.93}\n", + "{'loss': 0.2724, 'grad_norm': 1.2000150680541992, 'learning_rate': 6.215393905388278e-08, 'epoch': 1.96}\n", + "{'loss': 0.2642, 'grad_norm': 1.3346699476242065, 'learning_rate': 0.0, 'epoch': 1.99}\n", + "100%|█████████████████████████████████████████| 350/350 [37:04<00:00, 4.17s/it][INFO|trainer.py:3819] 2024-09-21 17:38:04,120 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 17:38:04,120 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 17:38:04,120 >> Batch size = 1\n", + "\n", + " 0%| | 0/2500 [00:00> Saving model checkpoint to saves/Qwen2.5-0.5B-Instruct/checkpoint-350\n", + "[INFO|configuration_utils.py:733] 2024-09-21 17:39:24,176 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 17:39:24,176 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 896,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 4864,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 14,\n", + " \"num_hidden_layers\": 24,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 17:39:24,198 >> tokenizer config file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-350/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 17:39:24,198 >> Special tokens file saved in saves/Qwen2.5-0.5B-Instruct/checkpoint-350/special_tokens_map.json\n", + "[INFO|trainer.py:2394] 2024-09-21 17:39:24,314 >> \n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "{'train_runtime': 2304.8725, 'train_samples_per_second': 19.524, 'train_steps_per_second': 0.152, 'train_loss': 0.32685707432883127, 'epoch': 1.99}\n", + "100%|█████████████████████████████████████████| 350/350 [38:24<00:00, 6.59s/it]\n", + "[INFO|trainer.py:3503] 2024-09-21 17:39:24,314 >> Saving model checkpoint to saves/Qwen2.5-0.5B-Instruct\n", + "[INFO|configuration_utils.py:733] 2024-09-21 17:39:24,851 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2.5-0.5B-Instruct/snapshots/a8b602d9dafd3a75d382e62757d83d89fca3be54/config.json\n", + "[INFO|configuration_utils.py:800] 2024-09-21 17:39:24,851 >> Model config Qwen2Config {\n", + " \"architectures\": [\n", + " \"Qwen2ForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 151643,\n", + " \"eos_token_id\": 151645,\n", + " \"hidden_act\": \"silu\",\n", + " \"hidden_size\": 896,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 4864,\n", + " \"max_position_embeddings\": 32768,\n", + " \"max_window_layers\": 21,\n", + " \"model_type\": \"qwen2\",\n", + " \"num_attention_heads\": 14,\n", + " \"num_hidden_layers\": 24,\n", + " \"num_key_value_heads\": 2,\n", + " \"rms_norm_eps\": 1e-06,\n", + " \"rope_theta\": 1000000.0,\n", + " \"sliding_window\": null,\n", + " \"tie_word_embeddings\": true,\n", + " \"torch_dtype\": \"bfloat16\",\n", + " \"transformers_version\": \"4.43.3\",\n", + " \"use_cache\": true,\n", + " \"use_sliding_window\": false,\n", + " \"vocab_size\": 151936\n", + "}\n", + "\n", + "[INFO|tokenization_utils_base.py:2702] 2024-09-21 17:39:24,873 >> tokenizer config file saved in saves/Qwen2.5-0.5B-Instruct/tokenizer_config.json\n", + "[INFO|tokenization_utils_base.py:2711] 2024-09-21 17:39:24,873 >> Special tokens file saved in saves/Qwen2.5-0.5B-Instruct/special_tokens_map.json\n", + "***** train metrics *****\n", + " epoch = 1.99\n", + " total_flos = 44213382GF\n", + " train_loss = 0.3269\n", + " train_runtime = 0:38:24.87\n", + " train_samples_per_second = 19.524\n", + " train_steps_per_second = 0.152\n", + "Figure saved at: saves/Qwen2.5-0.5B-Instruct/training_loss.png\n", + "Figure saved at: saves/Qwen2.5-0.5B-Instruct/training_eval_loss.png\n", + "09/21/2024 17:39:25 - WARNING - llamafactory.extras.ploting - No metric eval_accuracy to plot.\n", + "[INFO|trainer.py:3819] 2024-09-21 17:39:25,047 >> \n", + "***** Running Evaluation *****\n", + "[INFO|trainer.py:3821] 2024-09-21 17:39:25,048 >> Num examples = 2500\n", + "[INFO|trainer.py:3824] 2024-09-21 17:39:25,048 >> Batch size = 1\n", + "100%|███████████████████████████████████████| 2500/2500 [01:18<00:00, 31.98it/s]\n", + "***** eval metrics *****\n", + " epoch = 1.99\n", + " eval_loss = 0.2634\n", + " eval_runtime = 0:01:18.20\n", + " eval_samples_per_second = 31.968\n", + " eval_steps_per_second = 31.968\n", + "[INFO|modelcard.py:449] 2024-09-21 17:40:43,252 >> Dropping the following result as it does not have all the necessary fields:\n", + "{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}\n", + "CPU times: user 44.7 s, sys: 20.2 s, total: 1min 4s\n", + "Wall time: 1h 55min 33s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "!./scripts/tune-mgtv-bf16.sh" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "environmentMetadata": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 4 + }, + "notebookName": "07_MAC_+_Qwen2-7B-Instructi_Unsloth_train", + "widgets": {} + }, + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/scripts/eval-epochs.sh b/scripts/eval-epochs.sh new file mode 100755 index 0000000000000000000000000000000000000000..74f47db1fa0b649ec66350ab0c23c69ebe033359 --- /dev/null +++ b/scripts/eval-epochs.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +BASEDIR=$(dirname "$0") +cd $BASEDIR/.. +echo Current Directory: +pwd + +export LOGICAL_REASONING_DATA_PATH=datasets/mgtv +export RESIZE_TOKEN_EMBEDDINGS=true +export USING_LLAMA_FACTORY=true +export USING_P1_PROMPT_TEMPLATE=false +export LOAD_IN_4BIT=false + +export ORG_NAME=$1 +export MODEL=$2 +export MODEL_NAME=$ORG_NAME/$MODEL + +export LOGICAL_REASONING_RESULTS_PATH=data/${MODEL}_results.csv +export ADAPTER_PATH_BASE=llama-factory/saves/$MODEL_PREFIX + +echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE" +python llm_toolkit/eval_logical_reasoning_all_epochs.py \ No newline at end of file diff --git a/scripts/eval-mgtv-qwen2.5.sh b/scripts/eval-mgtv-qwen2.5.sh index d9c4c5551e00910c35ee75e3ee2c3b49bbe1d022..d602490bdb4d25ca4fe11d3dc1e046c0e53b52ce 100755 --- a/scripts/eval-mgtv-qwen2.5.sh +++ b/scripts/eval-mgtv-qwen2.5.sh @@ -13,17 +13,12 @@ cat /etc/os-release lscpu grep MemTotal /proc/meminfo -export LOGICAL_REASONING_DATA_PATH=datasets/mgtv -export RESIZE_TOKEN_EMBEDDINGS=true -export USING_LLAMA_FACTORY=true -export USING_P1_PROMPT_TEMPLATE=false -#export LOAD_IN_4BIT=true +$BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-3B-Instruct -export MODEL_PREFIX=Qwen2.5-3B-Instruct -export MODEL_NAME=qwen/$MODEL_PREFIX +$BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-1.5B-Instruct -export LOGICAL_REASONING_RESULTS_PATH=data/${MODEL_PREFIX}_results.csv -export ADAPTER_PATH_BASE=llama-factory/saves/$MODEL_PREFIX +$BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-1.5B-Instruct -echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE" -python llm_toolkit/eval_logical_reasoning_all_epochs.py +$BASEDIR/scripts/eval-epochs.sh Qwen Qwen2.5-0.5B-Instruct + +$BASEDIR/scripts/eval-shots.sh Qwen Qwen2.5-0.5B-Instruct