|
[WARNING|2025-03-26 10:33:41] logging.py:162 >> `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training. |
|
|
|
[INFO|2025-03-26 10:33:41] parser.py:355 >> Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16 |
|
|
|
[INFO|2025-03-26 10:33:42] parser.py:355 >> Process rank: 1, device: cuda:1, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16 |
|
|
|
[INFO|2025-03-26 10:33:42] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--Qwen--Qwen2.5-Coder-7B/snapshots/0396a76181e127dfc13e5c5ec48a8cee09938b02/config.json |
|
|
|
[INFO|2025-03-26 10:33:42] configuration_utils.py:746 >> Model config Qwen2Config { |
|
"_name_or_path": "Qwen/Qwen2.5-Coder-7B", |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 151643, |
|
"eos_token_id": 151643, |
|
"hidden_act": "silu", |
|
"hidden_size": 3584, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 18944, |
|
"max_position_embeddings": 32768, |
|
"max_window_layers": 28, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 28, |
|
"num_hidden_layers": 28, |
|
"num_key_value_heads": 4, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 1000000.0, |
|
"sliding_window": null, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.46.1", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 152064 |
|
} |
|
|
|
|
|
[INFO|2025-03-26 10:33:43] tokenization_utils_base.py:2211 >> loading file vocab.json from cache at /home/ubuntu/.cache/huggingface/hub/models--Qwen--Qwen2.5-Coder-7B/snapshots/0396a76181e127dfc13e5c5ec48a8cee09938b02/vocab.json |
|
|
|
[INFO|2025-03-26 10:33:43] tokenization_utils_base.py:2211 >> loading file merges.txt from cache at /home/ubuntu/.cache/huggingface/hub/models--Qwen--Qwen2.5-Coder-7B/snapshots/0396a76181e127dfc13e5c5ec48a8cee09938b02/merges.txt |
|
|
|
[INFO|2025-03-26 10:33:43] tokenization_utils_base.py:2211 >> loading file tokenizer.json from cache at /home/ubuntu/.cache/huggingface/hub/models--Qwen--Qwen2.5-Coder-7B/snapshots/0396a76181e127dfc13e5c5ec48a8cee09938b02/tokenizer.json |
|
|
|
[INFO|2025-03-26 10:33:43] tokenization_utils_base.py:2211 >> loading file added_tokens.json from cache at None |
|
|
|
[INFO|2025-03-26 10:33:43] tokenization_utils_base.py:2211 >> loading file special_tokens_map.json from cache at None |
|
|
|
[INFO|2025-03-26 10:33:43] tokenization_utils_base.py:2211 >> loading file tokenizer_config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--Qwen--Qwen2.5-Coder-7B/snapshots/0396a76181e127dfc13e5c5ec48a8cee09938b02/tokenizer_config.json |
|
|
|
[INFO|2025-03-26 10:33:43] tokenization_utils_base.py:2475 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
|
[INFO|2025-03-26 10:33:44] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--Qwen--Qwen2.5-Coder-7B/snapshots/0396a76181e127dfc13e5c5ec48a8cee09938b02/config.json |
|
|
|
[INFO|2025-03-26 10:33:44] configuration_utils.py:746 >> Model config Qwen2Config { |
|
"_name_or_path": "Qwen/Qwen2.5-Coder-7B", |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 151643, |
|
"eos_token_id": 151643, |
|
"hidden_act": "silu", |
|
"hidden_size": 3584, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 18944, |
|
"max_position_embeddings": 32768, |
|
"max_window_layers": 28, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 28, |
|
"num_hidden_layers": 28, |
|
"num_key_value_heads": 4, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 1000000.0, |
|
"sliding_window": null, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.46.1", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 152064 |
|
} |
|
|
|
|
|
[INFO|2025-03-26 10:33:44] tokenization_utils_base.py:2211 >> loading file vocab.json from cache at /home/ubuntu/.cache/huggingface/hub/models--Qwen--Qwen2.5-Coder-7B/snapshots/0396a76181e127dfc13e5c5ec48a8cee09938b02/vocab.json |
|
|
|
[INFO|2025-03-26 10:33:44] tokenization_utils_base.py:2211 >> loading file merges.txt from cache at /home/ubuntu/.cache/huggingface/hub/models--Qwen--Qwen2.5-Coder-7B/snapshots/0396a76181e127dfc13e5c5ec48a8cee09938b02/merges.txt |
|
|
|
[INFO|2025-03-26 10:33:44] tokenization_utils_base.py:2211 >> loading file tokenizer.json from cache at /home/ubuntu/.cache/huggingface/hub/models--Qwen--Qwen2.5-Coder-7B/snapshots/0396a76181e127dfc13e5c5ec48a8cee09938b02/tokenizer.json |
|
|
|
[INFO|2025-03-26 10:33:44] tokenization_utils_base.py:2211 >> loading file added_tokens.json from cache at None |
|
|
|
[INFO|2025-03-26 10:33:44] tokenization_utils_base.py:2211 >> loading file special_tokens_map.json from cache at None |
|
|
|
[INFO|2025-03-26 10:33:44] tokenization_utils_base.py:2211 >> loading file tokenizer_config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--Qwen--Qwen2.5-Coder-7B/snapshots/0396a76181e127dfc13e5c5ec48a8cee09938b02/tokenizer_config.json |
|
|
|
[INFO|2025-03-26 10:33:45] tokenization_utils_base.py:2475 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
|
[INFO|2025-03-26 10:33:45] logging.py:157 >> Loading dataset new-datasets/solidity_v3.json... |
|
|
|
[INFO|2025-03-26 10:34:08] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--Qwen--Qwen2.5-Coder-7B/snapshots/0396a76181e127dfc13e5c5ec48a8cee09938b02/config.json |
|
|
|
[INFO|2025-03-26 10:34:08] configuration_utils.py:746 >> Model config Qwen2Config { |
|
"_name_or_path": "Qwen/Qwen2.5-Coder-7B", |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 151643, |
|
"eos_token_id": 151643, |
|
"hidden_act": "silu", |
|
"hidden_size": 3584, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 18944, |
|
"max_position_embeddings": 32768, |
|
"max_window_layers": 28, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 28, |
|
"num_hidden_layers": 28, |
|
"num_key_value_heads": 4, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 1000000.0, |
|
"sliding_window": null, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.46.1", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 152064 |
|
} |
|
|
|
|
|
[INFO|2025-03-26 10:34:08] modeling_utils.py:3937 >> loading weights file model.safetensors from cache at /home/ubuntu/.cache/huggingface/hub/models--Qwen--Qwen2.5-Coder-7B/snapshots/0396a76181e127dfc13e5c5ec48a8cee09938b02/model.safetensors.index.json |
|
|
|
[INFO|2025-03-26 10:34:08] modeling_utils.py:1670 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
|
|
[INFO|2025-03-26 10:34:08] configuration_utils.py:1096 >> Generate config GenerationConfig { |
|
"bos_token_id": 151643, |
|
"eos_token_id": 151643 |
|
} |
|
|
|
|
|
[INFO|2025-03-26 10:34:14] modeling_utils.py:4800 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
|
|
[INFO|2025-03-26 10:34:14] modeling_utils.py:4808 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2.5-Coder-7B. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
|
|
[INFO|2025-03-26 10:34:14] configuration_utils.py:1051 >> loading configuration file generation_config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--Qwen--Qwen2.5-Coder-7B/snapshots/0396a76181e127dfc13e5c5ec48a8cee09938b02/generation_config.json |
|
|
|
[INFO|2025-03-26 10:34:14] configuration_utils.py:1096 >> Generate config GenerationConfig { |
|
"bos_token_id": 151643, |
|
"eos_token_id": 151643, |
|
"max_new_tokens": 2048 |
|
} |
|
|
|
|
|
[INFO|2025-03-26 10:34:14] logging.py:157 >> Gradient checkpointing enabled. |
|
|
|
[INFO|2025-03-26 10:34:14] logging.py:157 >> Using torch SDPA for faster training and inference. |
|
|
|
[INFO|2025-03-26 10:34:14] logging.py:157 >> Upcasting trainable params to float32. |
|
|
|
[INFO|2025-03-26 10:34:14] logging.py:157 >> Fine-tuning method: LoRA |
|
|
|
[INFO|2025-03-26 10:34:14] logging.py:157 >> Found linear modules: q_proj,k_proj,up_proj,down_proj,gate_proj,o_proj,v_proj |
|
|
|
[INFO|2025-03-26 10:34:15] logging.py:157 >> trainable params: 40,370,176 || all params: 7,655,986,688 || trainable%: 0.5273 |
|
|
|
[INFO|2025-03-26 10:34:15] trainer.py:698 >> Using auto half precision backend |
|
|
|
[INFO|2025-03-26 10:34:15] trainer.py:2313 >> ***** Running training ***** |
|
|
|
[INFO|2025-03-26 10:34:15] trainer.py:2314 >> Num examples = 38,495 |
|
|
|
[INFO|2025-03-26 10:34:15] trainer.py:2315 >> Num Epochs = 1 |
|
|
|
[INFO|2025-03-26 10:34:15] trainer.py:2316 >> Instantaneous batch size per device = 8 |
|
|
|
[INFO|2025-03-26 10:34:15] trainer.py:2319 >> Total train batch size (w. parallel, distributed & accumulation) = 192 |
|
|
|
[INFO|2025-03-26 10:34:15] trainer.py:2320 >> Gradient Accumulation steps = 12 |
|
|
|
[INFO|2025-03-26 10:34:15] trainer.py:2321 >> Total optimization steps = 200 |
|
|
|
[INFO|2025-03-26 10:34:15] trainer.py:2322 >> Number of trainable parameters = 40,370,176 |
|
|
|
[INFO|2025-03-26 10:37:04] logging.py:157 >> {'loss': 0.4044, 'learning_rate': 4.9923e-05, 'epoch': 0.02} |
|
|
|
[INFO|2025-03-26 10:39:53] logging.py:157 >> {'loss': 0.3910, 'learning_rate': 4.9692e-05, 'epoch': 0.05} |
|
|
|
[INFO|2025-03-26 10:42:42] logging.py:157 >> {'loss': 0.3583, 'learning_rate': 4.9309e-05, 'epoch': 0.07} |
|
|
|
[INFO|2025-03-26 10:45:30] logging.py:157 >> {'loss': 0.3771, 'learning_rate': 4.8776e-05, 'epoch': 0.10} |
|
|
|
[INFO|2025-03-26 10:48:19] logging.py:157 >> {'loss': 0.3408, 'learning_rate': 4.8097e-05, 'epoch': 0.12} |
|
|
|
[INFO|2025-03-26 10:51:08] logging.py:157 >> {'loss': 0.3665, 'learning_rate': 4.7275e-05, 'epoch': 0.15} |
|
|
|
[INFO|2025-03-26 10:53:56] logging.py:157 >> {'loss': 0.3508, 'learning_rate': 4.6316e-05, 'epoch': 0.17} |
|
|
|
[INFO|2025-03-26 10:56:44] logging.py:157 >> {'loss': 0.3558, 'learning_rate': 4.5225e-05, 'epoch': 0.20} |
|
|
|
[INFO|2025-03-26 10:59:33] logging.py:157 >> {'loss': 0.3579, 'learning_rate': 4.4010e-05, 'epoch': 0.22} |
|
|
|
[INFO|2025-03-26 11:02:21] logging.py:157 >> {'loss': 0.3333, 'learning_rate': 4.2678e-05, 'epoch': 0.25} |
|
|
|
[INFO|2025-03-26 11:05:10] logging.py:157 >> {'loss': 0.3263, 'learning_rate': 4.1236e-05, 'epoch': 0.27} |
|
|
|
[INFO|2025-03-26 11:07:58] logging.py:157 >> {'loss': 0.3476, 'learning_rate': 3.9695e-05, 'epoch': 0.30} |
|
|
|
[INFO|2025-03-26 11:10:47] logging.py:157 >> {'loss': 0.3294, 'learning_rate': 3.8062e-05, 'epoch': 0.32} |
|
|
|
[INFO|2025-03-26 11:13:35] logging.py:157 >> {'loss': 0.3560, 'learning_rate': 3.6350e-05, 'epoch': 0.35} |
|
|
|
[INFO|2025-03-26 11:16:24] logging.py:157 >> {'loss': 0.3304, 'learning_rate': 3.4567e-05, 'epoch': 0.37} |
|
|
|
[INFO|2025-03-26 11:19:13] logging.py:157 >> {'loss': 0.3436, 'learning_rate': 3.2725e-05, 'epoch': 0.40} |
|
|
|
[INFO|2025-03-26 11:22:01] logging.py:157 >> {'loss': 0.3388, 'learning_rate': 3.0836e-05, 'epoch': 0.42} |
|
|
|
[INFO|2025-03-26 11:24:49] logging.py:157 >> {'loss': 0.3367, 'learning_rate': 2.8911e-05, 'epoch': 0.45} |
|
|
|
[INFO|2025-03-26 11:27:38] logging.py:157 >> {'loss': 0.3561, 'learning_rate': 2.6961e-05, 'epoch': 0.47} |
|
|
|
[INFO|2025-03-26 11:30:26] logging.py:157 >> {'loss': 0.3182, 'learning_rate': 2.5000e-05, 'epoch': 0.50} |
|
|
|
[INFO|2025-03-26 11:30:26] trainer.py:3801 >> Saving model checkpoint to saves/Qwen2.5-Coder-7B/lora/solidity_qwen_model/checkpoint-100 |
|
|
|
[INFO|2025-03-26 11:30:27] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--Qwen--Qwen2.5-Coder-7B/snapshots/0396a76181e127dfc13e5c5ec48a8cee09938b02/config.json |
|
|
|
[INFO|2025-03-26 11:30:27] configuration_utils.py:746 >> Model config Qwen2Config { |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 151643, |
|
"eos_token_id": 151643, |
|
"hidden_act": "silu", |
|
"hidden_size": 3584, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 18944, |
|
"max_position_embeddings": 32768, |
|
"max_window_layers": 28, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 28, |
|
"num_hidden_layers": 28, |
|
"num_key_value_heads": 4, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 1000000.0, |
|
"sliding_window": null, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.46.1", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 152064 |
|
} |
|
|
|
|
|
[INFO|2025-03-26 11:30:27] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Qwen2.5-Coder-7B/lora/solidity_qwen_model/checkpoint-100/tokenizer_config.json |
|
|
|
[INFO|2025-03-26 11:30:27] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Qwen2.5-Coder-7B/lora/solidity_qwen_model/checkpoint-100/special_tokens_map.json |
|
|
|
[INFO|2025-03-26 11:33:17] logging.py:157 >> {'loss': 0.3331, 'learning_rate': 2.3039e-05, 'epoch': 0.52} |
|
|
|
[INFO|2025-03-26 11:36:05] logging.py:157 >> {'loss': 0.3383, 'learning_rate': 2.1089e-05, 'epoch': 0.55} |
|
|
|
[INFO|2025-03-26 11:38:54] logging.py:157 >> {'loss': 0.3366, 'learning_rate': 1.9164e-05, 'epoch': 0.57} |
|
|
|
[INFO|2025-03-26 11:41:43] logging.py:157 >> {'loss': 0.3324, 'learning_rate': 1.7275e-05, 'epoch': 0.60} |
|
|
|
[INFO|2025-03-26 11:44:31] logging.py:157 >> {'loss': 0.3030, 'learning_rate': 1.5433e-05, 'epoch': 0.62} |
|
|
|
[INFO|2025-03-26 11:47:20] logging.py:157 >> {'loss': 0.3500, 'learning_rate': 1.3650e-05, 'epoch': 0.65} |
|
|
|
[INFO|2025-03-26 11:50:08] logging.py:157 >> {'loss': 0.3310, 'learning_rate': 1.1938e-05, 'epoch': 0.67} |
|
|
|
[INFO|2025-03-26 11:52:57] logging.py:157 >> {'loss': 0.3440, 'learning_rate': 1.0305e-05, 'epoch': 0.70} |
|
|
|
[INFO|2025-03-26 11:55:45] logging.py:157 >> {'loss': 0.3567, 'learning_rate': 8.7638e-06, 'epoch': 0.72} |
|
|
|
[INFO|2025-03-26 11:58:34] logging.py:157 >> {'loss': 0.3184, 'learning_rate': 7.3223e-06, 'epoch': 0.75} |
|
|
|
[INFO|2025-03-26 12:01:21] logging.py:157 >> {'loss': 0.3279, 'learning_rate': 5.9899e-06, 'epoch': 0.77} |
|
|
|
[INFO|2025-03-26 12:04:10] logging.py:157 >> {'loss': 0.3372, 'learning_rate': 4.7746e-06, 'epoch': 0.80} |
|
|
|
[INFO|2025-03-26 12:06:58] logging.py:157 >> {'loss': 0.3553, 'learning_rate': 3.6840e-06, 'epoch': 0.82} |
|
|
|
[INFO|2025-03-26 12:09:47] logging.py:157 >> {'loss': 0.3361, 'learning_rate': 2.7248e-06, 'epoch': 0.85} |
|
|
|
[INFO|2025-03-26 12:12:36] logging.py:157 >> {'loss': 0.3335, 'learning_rate': 1.9030e-06, 'epoch': 0.87} |
|
|
|
[INFO|2025-03-26 12:15:25] logging.py:157 >> {'loss': 0.3425, 'learning_rate': 1.2236e-06, 'epoch': 0.90} |
|
|
|
[INFO|2025-03-26 12:18:12] logging.py:157 >> {'loss': 0.3247, 'learning_rate': 6.9075e-07, 'epoch': 0.92} |
|
|
|
[INFO|2025-03-26 12:21:01] logging.py:157 >> {'loss': 0.3314, 'learning_rate': 3.0779e-07, 'epoch': 0.95} |
|
|
|
[INFO|2025-03-26 12:23:49] logging.py:157 >> {'loss': 0.3354, 'learning_rate': 7.7067e-08, 'epoch': 0.97} |
|
|
|
[INFO|2025-03-26 12:26:38] logging.py:157 >> {'loss': 0.3413, 'learning_rate': 0.0000e+00, 'epoch': 1.00} |
|
|
|
[INFO|2025-03-26 12:26:38] trainer.py:3801 >> Saving model checkpoint to saves/Qwen2.5-Coder-7B/lora/solidity_qwen_model/checkpoint-200 |
|
|
|
[INFO|2025-03-26 12:26:38] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--Qwen--Qwen2.5-Coder-7B/snapshots/0396a76181e127dfc13e5c5ec48a8cee09938b02/config.json |
|
|
|
[INFO|2025-03-26 12:26:38] configuration_utils.py:746 >> Model config Qwen2Config { |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 151643, |
|
"eos_token_id": 151643, |
|
"hidden_act": "silu", |
|
"hidden_size": 3584, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 18944, |
|
"max_position_embeddings": 32768, |
|
"max_window_layers": 28, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 28, |
|
"num_hidden_layers": 28, |
|
"num_key_value_heads": 4, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 1000000.0, |
|
"sliding_window": null, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.46.1", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 152064 |
|
} |
|
|
|
|
|
[INFO|2025-03-26 12:26:39] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Qwen2.5-Coder-7B/lora/solidity_qwen_model/checkpoint-200/tokenizer_config.json |
|
|
|
[INFO|2025-03-26 12:26:39] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Qwen2.5-Coder-7B/lora/solidity_qwen_model/checkpoint-200/special_tokens_map.json |
|
|
|
[INFO|2025-03-26 12:26:39] trainer.py:2584 >> |
|
|
|
Training completed. Do not forget to share your model on huggingface.co/models =) |
|
|
|
|
|
|
|
[INFO|2025-03-26 12:26:39] trainer.py:3801 >> Saving model checkpoint to saves/Qwen2.5-Coder-7B/lora/solidity_qwen_model |
|
|
|
[INFO|2025-03-26 12:26:40] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--Qwen--Qwen2.5-Coder-7B/snapshots/0396a76181e127dfc13e5c5ec48a8cee09938b02/config.json |
|
|
|
[INFO|2025-03-26 12:26:40] configuration_utils.py:746 >> Model config Qwen2Config { |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 151643, |
|
"eos_token_id": 151643, |
|
"hidden_act": "silu", |
|
"hidden_size": 3584, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 18944, |
|
"max_position_embeddings": 32768, |
|
"max_window_layers": 28, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 28, |
|
"num_hidden_layers": 28, |
|
"num_key_value_heads": 4, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 1000000.0, |
|
"sliding_window": null, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.46.1", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 152064 |
|
} |
|
|
|
|
|
[INFO|2025-03-26 12:26:40] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/Qwen2.5-Coder-7B/lora/solidity_qwen_model/tokenizer_config.json |
|
|
|
[INFO|2025-03-26 12:26:40] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/Qwen2.5-Coder-7B/lora/solidity_qwen_model/special_tokens_map.json |
|
|
|
[WARNING|2025-03-26 12:26:40] logging.py:162 >> No metric eval_loss to plot. |
|
|
|
[WARNING|2025-03-26 12:26:40] logging.py:162 >> No metric eval_accuracy to plot. |
|
|
|
[INFO|2025-03-26 12:26:40] modelcard.py:449 >> Dropping the following result as it does not have all the necessary fields: |
|
{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}} |
|
|
|
|