diff --git a/README.md b/README.md index 5c0d499d78b811d53f0056c53cd1d031d55cdcad..1e3637f645b79c1dff559d466047b102e3892f5d 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,21 @@ --- -license: other +library_name: peft --- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f3958451b2551a7766d76c84304260071ab14ec5 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "o_proj", + "gate_proj", + "up_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/adapter_model.bin b/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ffba115bdd8c65f5a9ada357c3e61e3560a83277 --- /dev/null +++ b/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11213a0c00bc5430a832af35fa25de9d9592072a55593290454c5487e99e5a21 +size 500897101 diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..cbce74e5c64b97114098962fa58454a57d7fb532 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,5 @@ +{ + "": 2, + "": 1, + "": 0 +} diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-100/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f3958451b2551a7766d76c84304260071ab14ec5 --- /dev/null +++ b/checkpoint-100/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "o_proj", + "gate_proj", + "up_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-100/adapter_model.bin b/checkpoint-100/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ffba115bdd8c65f5a9ada357c3e61e3560a83277 --- /dev/null +++ b/checkpoint-100/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11213a0c00bc5430a832af35fa25de9d9592072a55593290454c5487e99e5a21 +size 500897101 diff --git a/checkpoint-100/adapter_model/README.md b/checkpoint-100/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-100/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-100/adapter_model/adapter_config.json b/checkpoint-100/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f3958451b2551a7766d76c84304260071ab14ec5 --- /dev/null +++ b/checkpoint-100/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "o_proj", + "gate_proj", + "up_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-100/adapter_model/adapter_model.bin b/checkpoint-100/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ffba115bdd8c65f5a9ada357c3e61e3560a83277 --- /dev/null +++ b/checkpoint-100/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11213a0c00bc5430a832af35fa25de9d9592072a55593290454c5487e99e5a21 +size 500897101 diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..71af508a82a3179baf35022408649a954bb1f163 --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c02d23e0bd4a6cef73a32ebf8402a8b1e1a8f18cbebb1526b512a32ee37be5d +size 1001736445 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7efe7ad2666741d8272f18f1741493bea1b3e547 --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c465c2771dab9ba6f712e86e9b71c6011a3dc6f2893016c97be18f355357c6b5 +size 14575 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ebc626f0568a6d7901706cd93e46f76a293ec91e --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ef30a43d20754fbeee89803ef955a46ca7e92fb8c40af3bdae2eaff291dcf94 +size 627 diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..39b961d9cb09e596aeda80a308e764968ffda653 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,619 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.8192771084337345, + "eval_steps": 150, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.05, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.2314, + "step": 1 + }, + { + "epoch": 0.1, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.4026, + "step": 2 + }, + { + "epoch": 0.14, + "learning_rate": 8.999999999999999e-05, + "loss": 1.8531, + "step": 3 + }, + { + "epoch": 0.19, + "learning_rate": 0.00011999999999999999, + "loss": 1.3763, + "step": 4 + }, + { + "epoch": 0.24, + "learning_rate": 0.00015, + "loss": 0.961, + "step": 5 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017999999999999998, + "loss": 1.2005, + "step": 6 + }, + { + "epoch": 0.34, + "learning_rate": 0.00020999999999999998, + "loss": 1.1054, + "step": 7 + }, + { + "epoch": 0.39, + "learning_rate": 0.00023999999999999998, + "loss": 1.0114, + "step": 8 + }, + { + "epoch": 0.43, + "learning_rate": 0.00027, + "loss": 1.1386, + "step": 9 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003, + "loss": 0.9856, + "step": 10 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029990862405286433, + "loss": 0.7562, + "step": 11 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002996346075389736, + "loss": 1.0498, + "step": 12 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029917828430524096, + "loss": 1.2559, + "step": 13 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002985402103112355, + "loss": 1.0074, + "step": 14 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002977211629518312, + "loss": 1.0239, + "step": 15 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002967221401100708, + "loss": 1.3441, + "step": 16 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002955443589413994, + "loss": 1.1692, + "step": 17 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002941892543907478, + "loss": 1.3764, + "step": 18 + }, + { + "epoch": 0.92, + "learning_rate": 0.00029265847744427303, + "loss": 0.9064, + "step": 19 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002909538931178862, + "loss": 1.4608, + "step": 20 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002890775781850181, + "loss": 1.0947, + "step": 21 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002870318186463901, + "loss": 1.113, + "step": 22 + }, + { + "epoch": 1.11, + "learning_rate": 0.000284819106944875, + "loss": 0.9546, + "step": 23 + }, + { + "epoch": 1.16, + "learning_rate": 0.000282442138928839, + "loss": 1.1469, + "step": 24 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002799038105676658, + "loss": 1.3102, + "step": 25 + }, + { + "epoch": 1.25, + "learning_rate": 0.00027720721442346387, + "loss": 1.4763, + "step": 26 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002743556358832562, + "loss": 1.0281, + "step": 27 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002713525491562421, + "loss": 1.0988, + "step": 28 + }, + { + "epoch": 1.4, + "learning_rate": 0.00026820161304100823, + "loss": 1.1848, + "step": 29 + }, + { + "epoch": 1.45, + "learning_rate": 0.00026490666646784665, + "loss": 0.9901, + "step": 30 + }, + { + "epoch": 1.49, + "learning_rate": 0.00026147172382160914, + "loss": 0.8676, + "step": 31 + }, + { + "epoch": 1.54, + "learning_rate": 0.00025790097005079764, + "loss": 1.0521, + "step": 32 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002541987555688496, + "loss": 1.0436, + "step": 33 + }, + { + "epoch": 1.64, + "learning_rate": 0.0002503695909538287, + "loss": 0.9511, + "step": 34 + }, + { + "epoch": 1.69, + "learning_rate": 0.0002464181414529809, + "loss": 1.2208, + "step": 35 + }, + { + "epoch": 1.73, + "learning_rate": 0.0002423492212988487, + "loss": 1.1128, + "step": 36 + }, + { + "epoch": 1.78, + "learning_rate": 0.00023816778784387094, + "loss": 1.114, + "step": 37 + }, + { + "epoch": 1.83, + "learning_rate": 0.00023387893552061199, + "loss": 0.9318, + "step": 38 + }, + { + "epoch": 1.88, + "learning_rate": 0.0002294878896349807, + "loss": 1.6424, + "step": 39 + }, + { + "epoch": 1.93, + "learning_rate": 0.000225, + "loss": 0.7759, + "step": 40 + }, + { + "epoch": 1.98, + "learning_rate": 0.00022042073441788358, + "loss": 0.8061, + "step": 41 + }, + { + "epoch": 2.02, + "learning_rate": 0.0002157556720183616, + "loss": 1.2972, + "step": 42 + }, + { + "epoch": 2.07, + "learning_rate": 0.00021101049646137003, + "loss": 0.9694, + "step": 43 + }, + { + "epoch": 2.12, + "learning_rate": 0.0002061909890123868, + "loss": 0.7951, + "step": 44 + }, + { + "epoch": 2.17, + "learning_rate": 0.00020130302149885031, + "loss": 0.8049, + "step": 45 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001963525491562421, + "loss": 0.7308, + "step": 46 + }, + { + "epoch": 2.27, + "learning_rate": 0.00019134560337254986, + "loss": 1.4495, + "step": 47 + }, + { + "epoch": 2.31, + "learning_rate": 0.00018628828433995013, + "loss": 0.8338, + "step": 48 + }, + { + "epoch": 2.36, + "learning_rate": 0.00018118675362266385, + "loss": 0.8706, + "step": 49 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017604722665003956, + "loss": 1.3767, + "step": 50 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001708759651440098, + "loss": 1.5032, + "step": 51 + }, + { + "epoch": 2.51, + "learning_rate": 0.000165679269490148, + "loss": 1.3356, + "step": 52 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016046347106161876, + "loss": 1.0869, + "step": 53 + }, + { + "epoch": 2.6, + "learning_rate": 0.00015523492450537517, + "loss": 0.8374, + "step": 54 + }, + { + "epoch": 2.65, + "learning_rate": 0.00015, + "loss": 0.9144, + "step": 55 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001447650754946249, + "loss": 0.7246, + "step": 56 + }, + { + "epoch": 2.75, + "learning_rate": 0.00013953652893838119, + "loss": 1.0432, + "step": 57 + }, + { + "epoch": 2.8, + "learning_rate": 0.000134320730509852, + "loss": 0.9271, + "step": 58 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001291240348559902, + "loss": 0.7968, + "step": 59 + }, + { + "epoch": 2.89, + "learning_rate": 0.00012395277334996044, + "loss": 0.9131, + "step": 60 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011881324637733611, + "loss": 0.937, + "step": 61 + }, + { + "epoch": 2.99, + "learning_rate": 0.00011371171566004985, + "loss": 0.8228, + "step": 62 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010865439662745013, + "loss": 1.0797, + "step": 63 + }, + { + "epoch": 3.08, + "learning_rate": 0.0001036474508437579, + "loss": 0.8893, + "step": 64 + }, + { + "epoch": 3.13, + "learning_rate": 9.869697850114969e-05, + "loss": 0.5448, + "step": 65 + }, + { + "epoch": 3.18, + "learning_rate": 9.380901098761319e-05, + "loss": 0.6197, + "step": 66 + }, + { + "epoch": 3.23, + "learning_rate": 8.898950353862998e-05, + "loss": 0.6925, + "step": 67 + }, + { + "epoch": 3.28, + "learning_rate": 8.424432798163836e-05, + "loss": 0.6065, + "step": 68 + }, + { + "epoch": 3.33, + "learning_rate": 7.957926558211642e-05, + "loss": 0.9739, + "step": 69 + }, + { + "epoch": 3.37, + "learning_rate": 7.500000000000002e-05, + "loss": 0.8073, + "step": 70 + }, + { + "epoch": 3.42, + "learning_rate": 7.051211036501928e-05, + "loss": 1.5069, + "step": 71 + }, + { + "epoch": 3.47, + "learning_rate": 6.612106447938799e-05, + "loss": 0.5149, + "step": 72 + }, + { + "epoch": 3.52, + "learning_rate": 6.183221215612904e-05, + "loss": 0.8116, + "step": 73 + }, + { + "epoch": 3.57, + "learning_rate": 5.765077870115125e-05, + "loss": 0.5185, + "step": 74 + }, + { + "epoch": 3.61, + "learning_rate": 5.358185854701909e-05, + "loss": 0.8031, + "step": 75 + }, + { + "epoch": 3.66, + "learning_rate": 4.963040904617131e-05, + "loss": 0.5611, + "step": 76 + }, + { + "epoch": 3.71, + "learning_rate": 4.5801244431150394e-05, + "loss": 0.7082, + "step": 77 + }, + { + "epoch": 3.76, + "learning_rate": 4.209902994920235e-05, + "loss": 0.8057, + "step": 78 + }, + { + "epoch": 3.81, + "learning_rate": 3.852827617839084e-05, + "loss": 1.8981, + "step": 79 + }, + { + "epoch": 3.86, + "learning_rate": 3.509333353215331e-05, + "loss": 0.8772, + "step": 80 + }, + { + "epoch": 3.9, + "learning_rate": 3.1798386958991714e-05, + "loss": 1.1716, + "step": 81 + }, + { + "epoch": 3.95, + "learning_rate": 2.8647450843757897e-05, + "loss": 0.949, + "step": 82 + }, + { + "epoch": 4.0, + "learning_rate": 2.5644364116743755e-05, + "loss": 0.901, + "step": 83 + }, + { + "epoch": 4.05, + "learning_rate": 2.2792785576536105e-05, + "loss": 1.0778, + "step": 84 + }, + { + "epoch": 4.1, + "learning_rate": 2.009618943233419e-05, + "loss": 1.0495, + "step": 85 + }, + { + "epoch": 4.14, + "learning_rate": 1.755786107116095e-05, + "loss": 1.1405, + "step": 86 + }, + { + "epoch": 4.19, + "learning_rate": 1.5180893055124977e-05, + "loss": 0.6457, + "step": 87 + }, + { + "epoch": 4.24, + "learning_rate": 1.2968181353609852e-05, + "loss": 0.7194, + "step": 88 + }, + { + "epoch": 4.29, + "learning_rate": 1.0922421814981901e-05, + "loss": 0.768, + "step": 89 + }, + { + "epoch": 4.34, + "learning_rate": 9.046106882113751e-06, + "loss": 0.7015, + "step": 90 + }, + { + "epoch": 4.39, + "learning_rate": 7.34152255572697e-06, + "loss": 0.9132, + "step": 91 + }, + { + "epoch": 4.43, + "learning_rate": 5.810745609252165e-06, + "loss": 0.703, + "step": 92 + }, + { + "epoch": 4.48, + "learning_rate": 4.455641058600528e-06, + "loss": 0.8964, + "step": 93 + }, + { + "epoch": 4.53, + "learning_rate": 3.2778598899291465e-06, + "loss": 0.9267, + "step": 94 + }, + { + "epoch": 4.58, + "learning_rate": 2.2788370481687965e-06, + "loss": 0.7872, + "step": 95 + }, + { + "epoch": 4.63, + "learning_rate": 1.4597896887644456e-06, + "loss": 0.7648, + "step": 96 + }, + { + "epoch": 4.67, + "learning_rate": 8.217156947590064e-07, + "loss": 0.5883, + "step": 97 + }, + { + "epoch": 4.72, + "learning_rate": 3.653924610263703e-07, + "loss": 0.6093, + "step": 98 + }, + { + "epoch": 4.77, + "learning_rate": 9.137594713563568e-08, + "loss": 0.8796, + "step": 99 + }, + { + "epoch": 4.82, + "learning_rate": 0.0, + "loss": 0.5744, + "step": 100 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_train_epochs": 5, + "save_steps": 500, + "total_flos": 2.55142096011264e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d9e4b64c077627c8c7d90a31379e1085611b967f --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7871ddb870dca149c8bd176c33a0174c54ce7f51f1623c6ad82c38e78485c96 +size 4411 diff --git a/checkpoint-41/README.md b/checkpoint-41/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-41/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-41/adapter_config.json b/checkpoint-41/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f3958451b2551a7766d76c84304260071ab14ec5 --- /dev/null +++ b/checkpoint-41/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "o_proj", + "gate_proj", + "up_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-41/adapter_model.bin b/checkpoint-41/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..21372bbd61eed2462d408974e1ffbda7ec824f18 --- /dev/null +++ b/checkpoint-41/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68ddbd889686556508110f942061578825177da74856785e15f97c63d4f4b005 +size 500897101 diff --git a/checkpoint-41/adapter_model/README.md b/checkpoint-41/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-41/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-41/adapter_model/adapter_config.json b/checkpoint-41/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f3958451b2551a7766d76c84304260071ab14ec5 --- /dev/null +++ b/checkpoint-41/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "o_proj", + "gate_proj", + "up_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-41/adapter_model/adapter_model.bin b/checkpoint-41/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..21372bbd61eed2462d408974e1ffbda7ec824f18 --- /dev/null +++ b/checkpoint-41/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68ddbd889686556508110f942061578825177da74856785e15f97c63d4f4b005 +size 500897101 diff --git a/checkpoint-41/optimizer.pt b/checkpoint-41/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3328f00c335805548852f3f5f761015ec8da2fdf --- /dev/null +++ b/checkpoint-41/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaa675f9cb3260fc969d19ad606acdb7a06f4141a35b38cbe2965d83b8d6ff59 +size 1001736445 diff --git a/checkpoint-41/rng_state.pth b/checkpoint-41/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..72fb44df9bc2f865afdfe2999801bda0312b7a18 --- /dev/null +++ b/checkpoint-41/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccf410588401c344d577bf277e576cb89ffef47ae39cc4edbb9da08ba2fbb438 +size 14575 diff --git a/checkpoint-41/scheduler.pt b/checkpoint-41/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..51dc90191037ad240c4c73b0d97c2498b1ed13c1 --- /dev/null +++ b/checkpoint-41/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:145b10f6d4a353b24514b0dab0a1f00a25175216b6926714cd4f2a5bf613b00b +size 627 diff --git a/checkpoint-41/trainer_state.json b/checkpoint-41/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e622958c3738de98a17ef942bac58260d05e7e85 --- /dev/null +++ b/checkpoint-41/trainer_state.json @@ -0,0 +1,265 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9759036144578315, + "eval_steps": 150, + "global_step": 41, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.05, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.2314, + "step": 1 + }, + { + "epoch": 0.1, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.4026, + "step": 2 + }, + { + "epoch": 0.14, + "learning_rate": 8.999999999999999e-05, + "loss": 1.8531, + "step": 3 + }, + { + "epoch": 0.19, + "learning_rate": 0.00011999999999999999, + "loss": 1.3763, + "step": 4 + }, + { + "epoch": 0.24, + "learning_rate": 0.00015, + "loss": 0.961, + "step": 5 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017999999999999998, + "loss": 1.2005, + "step": 6 + }, + { + "epoch": 0.34, + "learning_rate": 0.00020999999999999998, + "loss": 1.1054, + "step": 7 + }, + { + "epoch": 0.39, + "learning_rate": 0.00023999999999999998, + "loss": 1.0114, + "step": 8 + }, + { + "epoch": 0.43, + "learning_rate": 0.00027, + "loss": 1.1386, + "step": 9 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003, + "loss": 0.9856, + "step": 10 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029990862405286433, + "loss": 0.7562, + "step": 11 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002996346075389736, + "loss": 1.0498, + "step": 12 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029917828430524096, + "loss": 1.2559, + "step": 13 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002985402103112355, + "loss": 1.0074, + "step": 14 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002977211629518312, + "loss": 1.0239, + "step": 15 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002967221401100708, + "loss": 1.3441, + "step": 16 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002955443589413994, + "loss": 1.1692, + "step": 17 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002941892543907478, + "loss": 1.3764, + "step": 18 + }, + { + "epoch": 0.92, + "learning_rate": 0.00029265847744427303, + "loss": 0.9064, + "step": 19 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002909538931178862, + "loss": 1.4608, + "step": 20 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002890775781850181, + "loss": 1.0947, + "step": 21 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002870318186463901, + "loss": 1.113, + "step": 22 + }, + { + "epoch": 1.11, + "learning_rate": 0.000284819106944875, + "loss": 0.9546, + "step": 23 + }, + { + "epoch": 1.16, + "learning_rate": 0.000282442138928839, + "loss": 1.1469, + "step": 24 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002799038105676658, + "loss": 1.3102, + "step": 25 + }, + { + "epoch": 1.25, + "learning_rate": 0.00027720721442346387, + "loss": 1.4763, + "step": 26 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002743556358832562, + "loss": 1.0281, + "step": 27 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002713525491562421, + "loss": 1.0988, + "step": 28 + }, + { + "epoch": 1.4, + "learning_rate": 0.00026820161304100823, + "loss": 1.1848, + "step": 29 + }, + { + "epoch": 1.45, + "learning_rate": 0.00026490666646784665, + "loss": 0.9901, + "step": 30 + }, + { + "epoch": 1.49, + "learning_rate": 0.00026147172382160914, + "loss": 0.8676, + "step": 31 + }, + { + "epoch": 1.54, + "learning_rate": 0.00025790097005079764, + "loss": 1.0521, + "step": 32 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002541987555688496, + "loss": 1.0436, + "step": 33 + }, + { + "epoch": 1.64, + "learning_rate": 0.0002503695909538287, + "loss": 0.9511, + "step": 34 + }, + { + "epoch": 1.69, + "learning_rate": 0.0002464181414529809, + "loss": 1.2208, + "step": 35 + }, + { + "epoch": 1.73, + "learning_rate": 0.0002423492212988487, + "loss": 1.1128, + "step": 36 + }, + { + "epoch": 1.78, + "learning_rate": 0.00023816778784387094, + "loss": 1.114, + "step": 37 + }, + { + "epoch": 1.83, + "learning_rate": 0.00023387893552061199, + "loss": 0.9318, + "step": 38 + }, + { + "epoch": 1.88, + "learning_rate": 0.0002294878896349807, + "loss": 1.6424, + "step": 39 + }, + { + "epoch": 1.93, + "learning_rate": 0.000225, + "loss": 0.7759, + "step": 40 + }, + { + "epoch": 1.98, + "learning_rate": 0.00022042073441788358, + "loss": 0.8061, + "step": 41 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_train_epochs": 5, + "save_steps": 500, + "total_flos": 1.0588396984467456e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-41/training_args.bin b/checkpoint-41/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d9e4b64c077627c8c7d90a31379e1085611b967f --- /dev/null +++ b/checkpoint-41/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7871ddb870dca149c8bd176c33a0174c54ce7f51f1623c6ad82c38e78485c96 +size 4411 diff --git a/checkpoint-62/README.md b/checkpoint-62/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-62/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-62/adapter_config.json b/checkpoint-62/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f3958451b2551a7766d76c84304260071ab14ec5 --- /dev/null +++ b/checkpoint-62/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "o_proj", + "gate_proj", + "up_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-62/adapter_model.bin b/checkpoint-62/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..b0a47c92691fdcce0ba1303a5539f359b53bf3f5 --- /dev/null +++ b/checkpoint-62/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3996fa6cd634cef2e886ba53704cdaf585f63488956dc493d6208f5bd88b5f89 +size 500897101 diff --git a/checkpoint-62/adapter_model/README.md b/checkpoint-62/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-62/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-62/adapter_model/adapter_config.json b/checkpoint-62/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f3958451b2551a7766d76c84304260071ab14ec5 --- /dev/null +++ b/checkpoint-62/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "o_proj", + "gate_proj", + "up_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-62/adapter_model/adapter_model.bin b/checkpoint-62/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..b0a47c92691fdcce0ba1303a5539f359b53bf3f5 --- /dev/null +++ b/checkpoint-62/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3996fa6cd634cef2e886ba53704cdaf585f63488956dc493d6208f5bd88b5f89 +size 500897101 diff --git a/checkpoint-62/optimizer.pt b/checkpoint-62/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..84485c4f98b752a10af7d33c94674f750e620771 --- /dev/null +++ b/checkpoint-62/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b0ad741144f35cb085b4e4b6f6090ac1ee2d46ee5d3b4c75e1c88dde85c72d1 +size 1001736445 diff --git a/checkpoint-62/rng_state.pth b/checkpoint-62/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27463b54f84b79ecfb984b2bc3d286bd8c721300 --- /dev/null +++ b/checkpoint-62/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:819629affa360efc5b4f9241c8241641e09fe79d70e6c056cd560592ddce2847 +size 14575 diff --git a/checkpoint-62/scheduler.pt b/checkpoint-62/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4da276e96190d0b68e11daf82a4675b780a5fa7f --- /dev/null +++ b/checkpoint-62/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:885848ada3813b8df84a9c8eb6f0a75ba666603a30e5d56b09dfd1fbca05085f +size 627 diff --git a/checkpoint-62/trainer_state.json b/checkpoint-62/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..681ac28ac59584c244ebacfc6b7f31cd1ef69bcb --- /dev/null +++ b/checkpoint-62/trainer_state.json @@ -0,0 +1,391 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.9879518072289155, + "eval_steps": 150, + "global_step": 62, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.05, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.2314, + "step": 1 + }, + { + "epoch": 0.1, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.4026, + "step": 2 + }, + { + "epoch": 0.14, + "learning_rate": 8.999999999999999e-05, + "loss": 1.8531, + "step": 3 + }, + { + "epoch": 0.19, + "learning_rate": 0.00011999999999999999, + "loss": 1.3763, + "step": 4 + }, + { + "epoch": 0.24, + "learning_rate": 0.00015, + "loss": 0.961, + "step": 5 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017999999999999998, + "loss": 1.2005, + "step": 6 + }, + { + "epoch": 0.34, + "learning_rate": 0.00020999999999999998, + "loss": 1.1054, + "step": 7 + }, + { + "epoch": 0.39, + "learning_rate": 0.00023999999999999998, + "loss": 1.0114, + "step": 8 + }, + { + "epoch": 0.43, + "learning_rate": 0.00027, + "loss": 1.1386, + "step": 9 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003, + "loss": 0.9856, + "step": 10 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029990862405286433, + "loss": 0.7562, + "step": 11 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002996346075389736, + "loss": 1.0498, + "step": 12 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029917828430524096, + "loss": 1.2559, + "step": 13 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002985402103112355, + "loss": 1.0074, + "step": 14 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002977211629518312, + "loss": 1.0239, + "step": 15 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002967221401100708, + "loss": 1.3441, + "step": 16 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002955443589413994, + "loss": 1.1692, + "step": 17 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002941892543907478, + "loss": 1.3764, + "step": 18 + }, + { + "epoch": 0.92, + "learning_rate": 0.00029265847744427303, + "loss": 0.9064, + "step": 19 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002909538931178862, + "loss": 1.4608, + "step": 20 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002890775781850181, + "loss": 1.0947, + "step": 21 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002870318186463901, + "loss": 1.113, + "step": 22 + }, + { + "epoch": 1.11, + "learning_rate": 0.000284819106944875, + "loss": 0.9546, + "step": 23 + }, + { + "epoch": 1.16, + "learning_rate": 0.000282442138928839, + "loss": 1.1469, + "step": 24 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002799038105676658, + "loss": 1.3102, + "step": 25 + }, + { + "epoch": 1.25, + "learning_rate": 0.00027720721442346387, + "loss": 1.4763, + "step": 26 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002743556358832562, + "loss": 1.0281, + "step": 27 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002713525491562421, + "loss": 1.0988, + "step": 28 + }, + { + "epoch": 1.4, + "learning_rate": 0.00026820161304100823, + "loss": 1.1848, + "step": 29 + }, + { + "epoch": 1.45, + "learning_rate": 0.00026490666646784665, + "loss": 0.9901, + "step": 30 + }, + { + "epoch": 1.49, + "learning_rate": 0.00026147172382160914, + "loss": 0.8676, + "step": 31 + }, + { + "epoch": 1.54, + "learning_rate": 0.00025790097005079764, + "loss": 1.0521, + "step": 32 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002541987555688496, + "loss": 1.0436, + "step": 33 + }, + { + "epoch": 1.64, + "learning_rate": 0.0002503695909538287, + "loss": 0.9511, + "step": 34 + }, + { + "epoch": 1.69, + "learning_rate": 0.0002464181414529809, + "loss": 1.2208, + "step": 35 + }, + { + "epoch": 1.73, + "learning_rate": 0.0002423492212988487, + "loss": 1.1128, + "step": 36 + }, + { + "epoch": 1.78, + "learning_rate": 0.00023816778784387094, + "loss": 1.114, + "step": 37 + }, + { + "epoch": 1.83, + "learning_rate": 0.00023387893552061199, + "loss": 0.9318, + "step": 38 + }, + { + "epoch": 1.88, + "learning_rate": 0.0002294878896349807, + "loss": 1.6424, + "step": 39 + }, + { + "epoch": 1.93, + "learning_rate": 0.000225, + "loss": 0.7759, + "step": 40 + }, + { + "epoch": 1.98, + "learning_rate": 0.00022042073441788358, + "loss": 0.8061, + "step": 41 + }, + { + "epoch": 2.02, + "learning_rate": 0.0002157556720183616, + "loss": 1.2972, + "step": 42 + }, + { + "epoch": 2.07, + "learning_rate": 0.00021101049646137003, + "loss": 0.9694, + "step": 43 + }, + { + "epoch": 2.12, + "learning_rate": 0.0002061909890123868, + "loss": 0.7951, + "step": 44 + }, + { + "epoch": 2.17, + "learning_rate": 0.00020130302149885031, + "loss": 0.8049, + "step": 45 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001963525491562421, + "loss": 0.7308, + "step": 46 + }, + { + "epoch": 2.27, + "learning_rate": 0.00019134560337254986, + "loss": 1.4495, + "step": 47 + }, + { + "epoch": 2.31, + "learning_rate": 0.00018628828433995013, + "loss": 0.8338, + "step": 48 + }, + { + "epoch": 2.36, + "learning_rate": 0.00018118675362266385, + "loss": 0.8706, + "step": 49 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017604722665003956, + "loss": 1.3767, + "step": 50 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001708759651440098, + "loss": 1.5032, + "step": 51 + }, + { + "epoch": 2.51, + "learning_rate": 0.000165679269490148, + "loss": 1.3356, + "step": 52 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016046347106161876, + "loss": 1.0869, + "step": 53 + }, + { + "epoch": 2.6, + "learning_rate": 0.00015523492450537517, + "loss": 0.8374, + "step": 54 + }, + { + "epoch": 2.65, + "learning_rate": 0.00015, + "loss": 0.9144, + "step": 55 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001447650754946249, + "loss": 0.7246, + "step": 56 + }, + { + "epoch": 2.75, + "learning_rate": 0.00013953652893838119, + "loss": 1.0432, + "step": 57 + }, + { + "epoch": 2.8, + "learning_rate": 0.000134320730509852, + "loss": 0.9271, + "step": 58 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001291240348559902, + "loss": 0.7968, + "step": 59 + }, + { + "epoch": 2.89, + "learning_rate": 0.00012395277334996044, + "loss": 0.9131, + "step": 60 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011881324637733611, + "loss": 0.937, + "step": 61 + }, + { + "epoch": 2.99, + "learning_rate": 0.00011371171566004985, + "loss": 0.8228, + "step": 62 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_train_epochs": 5, + "save_steps": 500, + "total_flos": 1.5882595476701184e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-62/training_args.bin b/checkpoint-62/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d9e4b64c077627c8c7d90a31379e1085611b967f --- /dev/null +++ b/checkpoint-62/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7871ddb870dca149c8bd176c33a0174c54ce7f51f1623c6ad82c38e78485c96 +size 4411 diff --git a/checkpoint-83/README.md b/checkpoint-83/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-83/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-83/adapter_config.json b/checkpoint-83/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f3958451b2551a7766d76c84304260071ab14ec5 --- /dev/null +++ b/checkpoint-83/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "o_proj", + "gate_proj", + "up_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-83/adapter_model.bin b/checkpoint-83/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e8b94a6bfe3b88c4b81e9a8285b030c7ed89fd2 --- /dev/null +++ b/checkpoint-83/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:879cb7c9f89b873d886fb62e7cdcf8a9440aab1e33335fe885cb7d5a49ae78ae +size 500897101 diff --git a/checkpoint-83/adapter_model/README.md b/checkpoint-83/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e3637f645b79c1dff559d466047b102e3892f5d --- /dev/null +++ b/checkpoint-83/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.6.0.dev0 diff --git a/checkpoint-83/adapter_model/adapter_config.json b/checkpoint-83/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f3958451b2551a7766d76c84304260071ab14ec5 --- /dev/null +++ b/checkpoint-83/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "NousResearch/Llama-2-13b-hf", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "o_proj", + "gate_proj", + "up_proj", + "v_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-83/adapter_model/adapter_model.bin b/checkpoint-83/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e8b94a6bfe3b88c4b81e9a8285b030c7ed89fd2 --- /dev/null +++ b/checkpoint-83/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:879cb7c9f89b873d886fb62e7cdcf8a9440aab1e33335fe885cb7d5a49ae78ae +size 500897101 diff --git a/checkpoint-83/optimizer.pt b/checkpoint-83/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a314d3c3a450362e5c7706e275c6d3e4dd3b100 --- /dev/null +++ b/checkpoint-83/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd942b91e8b634407667729822b11d68150258cb2c2c428268928f0a1dcc7c83 +size 1001736445 diff --git a/checkpoint-83/rng_state.pth b/checkpoint-83/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..35dd085f6f80d624a49961fc5c053170600fc5b4 --- /dev/null +++ b/checkpoint-83/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e54251fd1f9b73f220ddc492ed2e7d2b0f6cd2f7c70974412a6eccd366a58c77 +size 14575 diff --git a/checkpoint-83/scheduler.pt b/checkpoint-83/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f04dbbd856e9a992744e3f723a737bca2cee6b31 --- /dev/null +++ b/checkpoint-83/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:877f8c8498f1154c5fce2ccfa856cadaf35802ef3e318bde01bf77c6f3f136b9 +size 627 diff --git a/checkpoint-83/trainer_state.json b/checkpoint-83/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..99922f0e9b6b575390d1e2e4e92dd3357316be3c --- /dev/null +++ b/checkpoint-83/trainer_state.json @@ -0,0 +1,517 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "eval_steps": 150, + "global_step": 83, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.05, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.2314, + "step": 1 + }, + { + "epoch": 0.1, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.4026, + "step": 2 + }, + { + "epoch": 0.14, + "learning_rate": 8.999999999999999e-05, + "loss": 1.8531, + "step": 3 + }, + { + "epoch": 0.19, + "learning_rate": 0.00011999999999999999, + "loss": 1.3763, + "step": 4 + }, + { + "epoch": 0.24, + "learning_rate": 0.00015, + "loss": 0.961, + "step": 5 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017999999999999998, + "loss": 1.2005, + "step": 6 + }, + { + "epoch": 0.34, + "learning_rate": 0.00020999999999999998, + "loss": 1.1054, + "step": 7 + }, + { + "epoch": 0.39, + "learning_rate": 0.00023999999999999998, + "loss": 1.0114, + "step": 8 + }, + { + "epoch": 0.43, + "learning_rate": 0.00027, + "loss": 1.1386, + "step": 9 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003, + "loss": 0.9856, + "step": 10 + }, + { + "epoch": 0.53, + "learning_rate": 0.00029990862405286433, + "loss": 0.7562, + "step": 11 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002996346075389736, + "loss": 1.0498, + "step": 12 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029917828430524096, + "loss": 1.2559, + "step": 13 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002985402103112355, + "loss": 1.0074, + "step": 14 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002977211629518312, + "loss": 1.0239, + "step": 15 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002967221401100708, + "loss": 1.3441, + "step": 16 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002955443589413994, + "loss": 1.1692, + "step": 17 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002941892543907478, + "loss": 1.3764, + "step": 18 + }, + { + "epoch": 0.92, + "learning_rate": 0.00029265847744427303, + "loss": 0.9064, + "step": 19 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002909538931178862, + "loss": 1.4608, + "step": 20 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002890775781850181, + "loss": 1.0947, + "step": 21 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002870318186463901, + "loss": 1.113, + "step": 22 + }, + { + "epoch": 1.11, + "learning_rate": 0.000284819106944875, + "loss": 0.9546, + "step": 23 + }, + { + "epoch": 1.16, + "learning_rate": 0.000282442138928839, + "loss": 1.1469, + "step": 24 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002799038105676658, + "loss": 1.3102, + "step": 25 + }, + { + "epoch": 1.25, + "learning_rate": 0.00027720721442346387, + "loss": 1.4763, + "step": 26 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002743556358832562, + "loss": 1.0281, + "step": 27 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002713525491562421, + "loss": 1.0988, + "step": 28 + }, + { + "epoch": 1.4, + "learning_rate": 0.00026820161304100823, + "loss": 1.1848, + "step": 29 + }, + { + "epoch": 1.45, + "learning_rate": 0.00026490666646784665, + "loss": 0.9901, + "step": 30 + }, + { + "epoch": 1.49, + "learning_rate": 0.00026147172382160914, + "loss": 0.8676, + "step": 31 + }, + { + "epoch": 1.54, + "learning_rate": 0.00025790097005079764, + "loss": 1.0521, + "step": 32 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002541987555688496, + "loss": 1.0436, + "step": 33 + }, + { + "epoch": 1.64, + "learning_rate": 0.0002503695909538287, + "loss": 0.9511, + "step": 34 + }, + { + "epoch": 1.69, + "learning_rate": 0.0002464181414529809, + "loss": 1.2208, + "step": 35 + }, + { + "epoch": 1.73, + "learning_rate": 0.0002423492212988487, + "loss": 1.1128, + "step": 36 + }, + { + "epoch": 1.78, + "learning_rate": 0.00023816778784387094, + "loss": 1.114, + "step": 37 + }, + { + "epoch": 1.83, + "learning_rate": 0.00023387893552061199, + "loss": 0.9318, + "step": 38 + }, + { + "epoch": 1.88, + "learning_rate": 0.0002294878896349807, + "loss": 1.6424, + "step": 39 + }, + { + "epoch": 1.93, + "learning_rate": 0.000225, + "loss": 0.7759, + "step": 40 + }, + { + "epoch": 1.98, + "learning_rate": 0.00022042073441788358, + "loss": 0.8061, + "step": 41 + }, + { + "epoch": 2.02, + "learning_rate": 0.0002157556720183616, + "loss": 1.2972, + "step": 42 + }, + { + "epoch": 2.07, + "learning_rate": 0.00021101049646137003, + "loss": 0.9694, + "step": 43 + }, + { + "epoch": 2.12, + "learning_rate": 0.0002061909890123868, + "loss": 0.7951, + "step": 44 + }, + { + "epoch": 2.17, + "learning_rate": 0.00020130302149885031, + "loss": 0.8049, + "step": 45 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001963525491562421, + "loss": 0.7308, + "step": 46 + }, + { + "epoch": 2.27, + "learning_rate": 0.00019134560337254986, + "loss": 1.4495, + "step": 47 + }, + { + "epoch": 2.31, + "learning_rate": 0.00018628828433995013, + "loss": 0.8338, + "step": 48 + }, + { + "epoch": 2.36, + "learning_rate": 0.00018118675362266385, + "loss": 0.8706, + "step": 49 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017604722665003956, + "loss": 1.3767, + "step": 50 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001708759651440098, + "loss": 1.5032, + "step": 51 + }, + { + "epoch": 2.51, + "learning_rate": 0.000165679269490148, + "loss": 1.3356, + "step": 52 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016046347106161876, + "loss": 1.0869, + "step": 53 + }, + { + "epoch": 2.6, + "learning_rate": 0.00015523492450537517, + "loss": 0.8374, + "step": 54 + }, + { + "epoch": 2.65, + "learning_rate": 0.00015, + "loss": 0.9144, + "step": 55 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001447650754946249, + "loss": 0.7246, + "step": 56 + }, + { + "epoch": 2.75, + "learning_rate": 0.00013953652893838119, + "loss": 1.0432, + "step": 57 + }, + { + "epoch": 2.8, + "learning_rate": 0.000134320730509852, + "loss": 0.9271, + "step": 58 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001291240348559902, + "loss": 0.7968, + "step": 59 + }, + { + "epoch": 2.89, + "learning_rate": 0.00012395277334996044, + "loss": 0.9131, + "step": 60 + }, + { + "epoch": 2.94, + "learning_rate": 0.00011881324637733611, + "loss": 0.937, + "step": 61 + }, + { + "epoch": 2.99, + "learning_rate": 0.00011371171566004985, + "loss": 0.8228, + "step": 62 + }, + { + "epoch": 3.04, + "learning_rate": 0.00010865439662745013, + "loss": 1.0797, + "step": 63 + }, + { + "epoch": 3.08, + "learning_rate": 0.0001036474508437579, + "loss": 0.8893, + "step": 64 + }, + { + "epoch": 3.13, + "learning_rate": 9.869697850114969e-05, + "loss": 0.5448, + "step": 65 + }, + { + "epoch": 3.18, + "learning_rate": 9.380901098761319e-05, + "loss": 0.6197, + "step": 66 + }, + { + "epoch": 3.23, + "learning_rate": 8.898950353862998e-05, + "loss": 0.6925, + "step": 67 + }, + { + "epoch": 3.28, + "learning_rate": 8.424432798163836e-05, + "loss": 0.6065, + "step": 68 + }, + { + "epoch": 3.33, + "learning_rate": 7.957926558211642e-05, + "loss": 0.9739, + "step": 69 + }, + { + "epoch": 3.37, + "learning_rate": 7.500000000000002e-05, + "loss": 0.8073, + "step": 70 + }, + { + "epoch": 3.42, + "learning_rate": 7.051211036501928e-05, + "loss": 1.5069, + "step": 71 + }, + { + "epoch": 3.47, + "learning_rate": 6.612106447938799e-05, + "loss": 0.5149, + "step": 72 + }, + { + "epoch": 3.52, + "learning_rate": 6.183221215612904e-05, + "loss": 0.8116, + "step": 73 + }, + { + "epoch": 3.57, + "learning_rate": 5.765077870115125e-05, + "loss": 0.5185, + "step": 74 + }, + { + "epoch": 3.61, + "learning_rate": 5.358185854701909e-05, + "loss": 0.8031, + "step": 75 + }, + { + "epoch": 3.66, + "learning_rate": 4.963040904617131e-05, + "loss": 0.5611, + "step": 76 + }, + { + "epoch": 3.71, + "learning_rate": 4.5801244431150394e-05, + "loss": 0.7082, + "step": 77 + }, + { + "epoch": 3.76, + "learning_rate": 4.209902994920235e-05, + "loss": 0.8057, + "step": 78 + }, + { + "epoch": 3.81, + "learning_rate": 3.852827617839084e-05, + "loss": 1.8981, + "step": 79 + }, + { + "epoch": 3.86, + "learning_rate": 3.509333353215331e-05, + "loss": 0.8772, + "step": 80 + }, + { + "epoch": 3.9, + "learning_rate": 3.1798386958991714e-05, + "loss": 1.1716, + "step": 81 + }, + { + "epoch": 3.95, + "learning_rate": 2.8647450843757897e-05, + "loss": 0.949, + "step": 82 + }, + { + "epoch": 4.0, + "learning_rate": 2.5644364116743755e-05, + "loss": 0.901, + "step": 83 + } + ], + "logging_steps": 1, + "max_steps": 100, + "num_train_epochs": 5, + "save_steps": 500, + "total_flos": 2.1176793968934912e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-83/training_args.bin b/checkpoint-83/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d9e4b64c077627c8c7d90a31379e1085611b967f --- /dev/null +++ b/checkpoint-83/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7871ddb870dca149c8bd176c33a0174c54ce7f51f1623c6ad82c38e78485c96 +size 4411 diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1481b7c42e4373f0f7e68732c552b88007c38d63 --- /dev/null +++ b/config.json @@ -0,0 +1,39 @@ +{ + "_name_or_path": "NousResearch/Llama-2-13b-hf", + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "intermediate_size": 13824, + "max_position_embeddings": 4096, + "model_type": "llama", + "num_attention_heads": 40, + "num_hidden_layers": 40, + "num_key_value_heads": 40, + "pad_token_id": 0, + "pretraining_tp": 1, + "quantization_config": { + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "llm_int8_enable_fp32_cpu_offload": false, + "llm_int8_has_fp16_weight": false, + "llm_int8_skip_modules": null, + "llm_int8_threshold": 6.0, + "load_in_4bit": true, + "load_in_8bit": false, + "quant_method": "bitsandbytes" + }, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.34.0.dev0", + "use_cache": false, + "vocab_size": 32000 +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..09a41a350e349ab0990be2f5cab2b105fad54199 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "", + "eos_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fd424505b407346d4847dea9d3a9226671071460 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,45 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "tokenizer_file": null, + "trust_remote_code": false, + "unk_token": "", + "use_default_system_prompt": true, + "use_fast": true +}