diff --git a/codellama/c/callgraph_c_pretrained/all_results.json b/codellama/c/callgraph_c_pretrained/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2c76f7526e89b6a840a74cf021f4021b461be593 --- /dev/null +++ b/codellama/c/callgraph_c_pretrained/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.504, + "total_flos": 2.7094162776644813e+18, + "train_loss": 0.8028768169119003, + "train_runtime": 84777.289, + "train_samples_per_second": 0.355, + "train_steps_per_second": 0.006 +} \ No newline at end of file diff --git a/codellama/c/callgraph_c_pretrained/checkpoint-470/README.md b/codellama/c/callgraph_c_pretrained/checkpoint-470/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/callgraph_c_pretrained/checkpoint-470/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/callgraph_c_pretrained/checkpoint-470/adapter_config.json b/codellama/c/callgraph_c_pretrained/checkpoint-470/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..187328c76da94dab963d1cb813d6e5916fac3522 --- /dev/null +++ b/codellama/c/callgraph_c_pretrained/checkpoint-470/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "o_proj", + "gate_proj", + "q_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/callgraph_c_pretrained/checkpoint-470/adapter_model.safetensors b/codellama/c/callgraph_c_pretrained/checkpoint-470/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8dbdcc8fd0ef1fdc0ec2b64209fc980a1cc0a853 --- /dev/null +++ b/codellama/c/callgraph_c_pretrained/checkpoint-470/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:496f57c9e48744c36c05eb597046e53c7f65a711bd7d869d9be95a50ddb742b3 +size 1156480200 diff --git a/codellama/c/callgraph_c_pretrained/checkpoint-470/adapter_model/README.md b/codellama/c/callgraph_c_pretrained/checkpoint-470/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/callgraph_c_pretrained/checkpoint-470/adapter_model/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/callgraph_c_pretrained/checkpoint-470/adapter_model/adapter_config.json b/codellama/c/callgraph_c_pretrained/checkpoint-470/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..187328c76da94dab963d1cb813d6e5916fac3522 --- /dev/null +++ b/codellama/c/callgraph_c_pretrained/checkpoint-470/adapter_model/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "k_proj", + "o_proj", + "gate_proj", + "q_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/callgraph_c_pretrained/checkpoint-470/adapter_model/adapter_model.safetensors b/codellama/c/callgraph_c_pretrained/checkpoint-470/adapter_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8dbdcc8fd0ef1fdc0ec2b64209fc980a1cc0a853 --- /dev/null +++ b/codellama/c/callgraph_c_pretrained/checkpoint-470/adapter_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:496f57c9e48744c36c05eb597046e53c7f65a711bd7d869d9be95a50ddb742b3 +size 1156480200 diff --git a/codellama/c/callgraph_c_pretrained/checkpoint-470/added_tokens.json b/codellama/c/callgraph_c_pretrained/checkpoint-470/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074 --- /dev/null +++ b/codellama/c/callgraph_c_pretrained/checkpoint-470/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 32016 +} diff --git a/codellama/c/callgraph_c_pretrained/checkpoint-470/optimizer.pt b/codellama/c/callgraph_c_pretrained/checkpoint-470/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..dea7d7dea179d1df6903a2ba6baa13438b12a0ac --- /dev/null +++ b/codellama/c/callgraph_c_pretrained/checkpoint-470/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c78b0ad96a7328411a9c874d042463b20a615201bd9f8f7f78d73ff2ffb60d6e +size 2003127538 diff --git a/codellama/c/callgraph_c_pretrained/checkpoint-470/rng_state.pth b/codellama/c/callgraph_c_pretrained/checkpoint-470/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d0612a057f448c9891a1cc1ebe27ebb6f5d1b43d --- /dev/null +++ b/codellama/c/callgraph_c_pretrained/checkpoint-470/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4a52a684b9d61e86ff83d2ea2b3e12008f3394639dfd22a8d71f8e64032f458 +size 14244 diff --git a/codellama/c/callgraph_c_pretrained/checkpoint-470/scheduler.pt b/codellama/c/callgraph_c_pretrained/checkpoint-470/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..49b9955b9a5490a100edbacfecb1c5c322942063 --- /dev/null +++ b/codellama/c/callgraph_c_pretrained/checkpoint-470/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e20bf7ee79a65811d62a4cee70ed79c6e890eb65e39067c9ecbb1074504af0b +size 1064 diff --git a/codellama/c/callgraph_c_pretrained/checkpoint-470/special_tokens_map.json b/codellama/c/callgraph_c_pretrained/checkpoint-470/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d --- /dev/null +++ b/codellama/c/callgraph_c_pretrained/checkpoint-470/special_tokens_map.json @@ -0,0 +1,36 @@ +{ + "additional_special_tokens": [ + "▁
", + "▁", + "▁ ", + "▁ " + ], + "bos_token": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/codellama/c/callgraph_c_pretrained/checkpoint-470/tokenizer.model b/codellama/c/callgraph_c_pretrained/checkpoint-470/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4 --- /dev/null +++ b/codellama/c/callgraph_c_pretrained/checkpoint-470/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6 +size 500058 diff --git a/codellama/c/callgraph_c_pretrained/checkpoint-470/tokenizer_config.json b/codellama/c/callgraph_c_pretrained/checkpoint-470/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d --- /dev/null +++ b/codellama/c/callgraph_c_pretrained/checkpoint-470/tokenizer_config.json @@ -0,0 +1,94 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": " ", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<", + "eot_token": "▁>\\n' + system_message + '\\n< >\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "fill_token": " ", + "legacy": null, + "middle_token": "▁ ", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "[PAD]", + "padding_side": "right", + "prefix_token": "▁ ", + "sp_model_kwargs": {}, + "suffix_first": false, + "suffix_token": "▁", + "tokenizer_class": "CodeLlamaTokenizer", + "unk_token": " ", + "use_default_system_prompt": false +} diff --git a/codellama/c/callgraph_c_pretrained/checkpoint-470/trainer_state.json b/codellama/c/callgraph_c_pretrained/checkpoint-470/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8a16767c0bcc5b4364b01bb28b3959f0aacf0040 --- /dev/null +++ b/codellama/c/callgraph_c_pretrained/checkpoint-470/trainer_state.json @@ -0,0 +1,691 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.504, + "eval_steps": 500, + "global_step": 470, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.016, + "grad_norm": 0.33984375, + "learning_rate": 0.0001, + "loss": 4.2845, + "step": 5 + }, + { + "epoch": 0.032, + "grad_norm": 0.20703125, + "learning_rate": 0.0001, + "loss": 2.611, + "step": 10 + }, + { + "epoch": 0.048, + "grad_norm": 2.8125, + "learning_rate": 0.0001, + "loss": 2.1007, + "step": 15 + }, + { + "epoch": 0.064, + "grad_norm": 1.2890625, + "learning_rate": 0.0001, + "loss": 2.0667, + "step": 20 + }, + { + "epoch": 0.08, + "grad_norm": 0.3203125, + "learning_rate": 0.0001, + "loss": 1.6745, + "step": 25 + }, + { + "epoch": 0.096, + "grad_norm": 0.33984375, + "learning_rate": 0.0001, + "loss": 1.4179, + "step": 30 + }, + { + "epoch": 0.112, + "grad_norm": 0.369140625, + "learning_rate": 0.0001, + "loss": 1.256, + "step": 35 + }, + { + "epoch": 0.128, + "grad_norm": 0.1767578125, + "learning_rate": 0.0001, + "loss": 1.1206, + "step": 40 + }, + { + "epoch": 0.144, + "grad_norm": 0.1806640625, + "learning_rate": 0.0001, + "loss": 0.8113, + "step": 45 + }, + { + "epoch": 0.16, + "grad_norm": 0.31640625, + "learning_rate": 0.0001, + "loss": 0.5563, + "step": 50 + }, + { + "epoch": 0.176, + "grad_norm": 0.47265625, + "learning_rate": 0.0001, + "loss": 1.2945, + "step": 55 + }, + { + "epoch": 0.192, + "grad_norm": 0.30078125, + "learning_rate": 0.0001, + "loss": 1.1513, + "step": 60 + }, + { + "epoch": 0.208, + "grad_norm": 0.87109375, + "learning_rate": 0.0001, + "loss": 1.0038, + "step": 65 + }, + { + "epoch": 0.224, + "grad_norm": 0.11376953125, + "learning_rate": 0.0001, + "loss": 0.9775, + "step": 70 + }, + { + "epoch": 0.24, + "grad_norm": 0.1376953125, + "learning_rate": 0.0001, + "loss": 0.9107, + "step": 75 + }, + { + "epoch": 0.256, + "grad_norm": 0.13671875, + "learning_rate": 0.0001, + "loss": 0.8357, + "step": 80 + }, + { + "epoch": 0.272, + "grad_norm": 0.2734375, + "learning_rate": 0.0001, + "loss": 0.8438, + "step": 85 + }, + { + "epoch": 0.288, + "grad_norm": 0.1318359375, + "learning_rate": 0.0001, + "loss": 0.8182, + "step": 90 + }, + { + "epoch": 0.304, + "grad_norm": 0.1220703125, + "learning_rate": 0.0001, + "loss": 0.6811, + "step": 95 + }, + { + "epoch": 0.32, + "grad_norm": 0.1474609375, + "learning_rate": 0.0001, + "loss": 0.5087, + "step": 100 + }, + { + "epoch": 0.336, + "grad_norm": 0.1767578125, + "learning_rate": 0.0001, + "loss": 0.9827, + "step": 105 + }, + { + "epoch": 0.352, + "grad_norm": 0.1943359375, + "learning_rate": 0.0001, + "loss": 0.9673, + "step": 110 + }, + { + "epoch": 0.368, + "grad_norm": 0.21484375, + "learning_rate": 0.0001, + "loss": 0.9514, + "step": 115 + }, + { + "epoch": 0.384, + "grad_norm": 0.1630859375, + "learning_rate": 0.0001, + "loss": 0.8378, + "step": 120 + }, + { + "epoch": 0.4, + "grad_norm": 0.1220703125, + "learning_rate": 0.0001, + "loss": 0.8721, + "step": 125 + }, + { + "epoch": 0.416, + "grad_norm": 0.1328125, + "learning_rate": 0.0001, + "loss": 0.8317, + "step": 130 + }, + { + "epoch": 0.432, + "grad_norm": 0.1328125, + "learning_rate": 0.0001, + "loss": 0.7948, + "step": 135 + }, + { + "epoch": 0.448, + "grad_norm": 0.1328125, + "learning_rate": 0.0001, + "loss": 0.7682, + "step": 140 + }, + { + "epoch": 0.464, + "grad_norm": 0.107421875, + "learning_rate": 0.0001, + "loss": 0.6472, + "step": 145 + }, + { + "epoch": 0.48, + "grad_norm": 0.1513671875, + "learning_rate": 0.0001, + "loss": 0.463, + "step": 150 + }, + { + "epoch": 0.496, + "grad_norm": 0.16015625, + "learning_rate": 0.0001, + "loss": 0.8907, + "step": 155 + }, + { + "epoch": 0.512, + "grad_norm": 0.1337890625, + "learning_rate": 0.0001, + "loss": 0.8254, + "step": 160 + }, + { + "epoch": 0.528, + "grad_norm": 0.146484375, + "learning_rate": 0.0001, + "loss": 0.8455, + "step": 165 + }, + { + "epoch": 0.544, + "grad_norm": 0.15625, + "learning_rate": 0.0001, + "loss": 0.8194, + "step": 170 + }, + { + "epoch": 0.56, + "grad_norm": 0.166015625, + "learning_rate": 0.0001, + "loss": 0.8291, + "step": 175 + }, + { + "epoch": 0.576, + "grad_norm": 0.1787109375, + "learning_rate": 0.0001, + "loss": 0.7265, + "step": 180 + }, + { + "epoch": 0.592, + "grad_norm": 0.208984375, + "learning_rate": 0.0001, + "loss": 0.7856, + "step": 185 + }, + { + "epoch": 0.608, + "grad_norm": 0.2216796875, + "learning_rate": 0.0001, + "loss": 0.7599, + "step": 190 + }, + { + "epoch": 0.624, + "grad_norm": 0.130859375, + "learning_rate": 0.0001, + "loss": 0.6127, + "step": 195 + }, + { + "epoch": 0.64, + "grad_norm": 0.1982421875, + "learning_rate": 0.0001, + "loss": 0.4152, + "step": 200 + }, + { + "epoch": 0.656, + "grad_norm": 0.1943359375, + "learning_rate": 0.0001, + "loss": 0.8772, + "step": 205 + }, + { + "epoch": 0.672, + "grad_norm": 0.197265625, + "learning_rate": 0.0001, + "loss": 0.7661, + "step": 210 + }, + { + "epoch": 0.688, + "grad_norm": 0.173828125, + "learning_rate": 0.0001, + "loss": 0.8362, + "step": 215 + }, + { + "epoch": 0.704, + "grad_norm": 0.18359375, + "learning_rate": 0.0001, + "loss": 0.6781, + "step": 220 + }, + { + "epoch": 0.72, + "grad_norm": 0.1708984375, + "learning_rate": 0.0001, + "loss": 0.7479, + "step": 225 + }, + { + "epoch": 0.736, + "grad_norm": 0.177734375, + "learning_rate": 0.0001, + "loss": 0.6598, + "step": 230 + }, + { + "epoch": 0.752, + "grad_norm": 0.177734375, + "learning_rate": 0.0001, + "loss": 0.7109, + "step": 235 + }, + { + "epoch": 0.768, + "grad_norm": 0.1337890625, + "learning_rate": 0.0001, + "loss": 0.6603, + "step": 240 + }, + { + "epoch": 0.784, + "grad_norm": 0.1494140625, + "learning_rate": 0.0001, + "loss": 0.5983, + "step": 245 + }, + { + "epoch": 0.8, + "grad_norm": 0.1630859375, + "learning_rate": 0.0001, + "loss": 0.3945, + "step": 250 + }, + { + "epoch": 0.816, + "grad_norm": 0.173828125, + "learning_rate": 0.0001, + "loss": 0.7734, + "step": 255 + }, + { + "epoch": 0.832, + "grad_norm": 0.2021484375, + "learning_rate": 0.0001, + "loss": 0.7553, + "step": 260 + }, + { + "epoch": 0.848, + "grad_norm": 0.177734375, + "learning_rate": 0.0001, + "loss": 0.8062, + "step": 265 + }, + { + "epoch": 0.864, + "grad_norm": 0.150390625, + "learning_rate": 0.0001, + "loss": 0.6815, + "step": 270 + }, + { + "epoch": 0.88, + "grad_norm": 0.1689453125, + "learning_rate": 0.0001, + "loss": 0.7524, + "step": 275 + }, + { + "epoch": 0.896, + "grad_norm": 0.1572265625, + "learning_rate": 0.0001, + "loss": 0.6798, + "step": 280 + }, + { + "epoch": 0.912, + "grad_norm": 0.177734375, + "learning_rate": 0.0001, + "loss": 0.7037, + "step": 285 + }, + { + "epoch": 0.928, + "grad_norm": 0.1591796875, + "learning_rate": 0.0001, + "loss": 0.6274, + "step": 290 + }, + { + "epoch": 0.944, + "grad_norm": 0.1630859375, + "learning_rate": 0.0001, + "loss": 0.6103, + "step": 295 + }, + { + "epoch": 0.96, + "grad_norm": 0.22265625, + "learning_rate": 0.0001, + "loss": 0.3983, + "step": 300 + }, + { + "epoch": 0.976, + "grad_norm": 0.1611328125, + "learning_rate": 0.0001, + "loss": 0.6683, + "step": 305 + }, + { + "epoch": 0.992, + "grad_norm": 0.1533203125, + "learning_rate": 0.0001, + "loss": 0.6045, + "step": 310 + }, + { + "epoch": 1.008, + "grad_norm": 0.1630859375, + "learning_rate": 0.0001, + "loss": 0.5759, + "step": 315 + }, + { + "epoch": 1.024, + "grad_norm": 0.1904296875, + "learning_rate": 0.0001, + "loss": 0.5826, + "step": 320 + }, + { + "epoch": 1.04, + "grad_norm": 0.2490234375, + "learning_rate": 0.0001, + "loss": 0.6502, + "step": 325 + }, + { + "epoch": 1.056, + "grad_norm": 0.2353515625, + "learning_rate": 0.0001, + "loss": 0.6278, + "step": 330 + }, + { + "epoch": 1.072, + "grad_norm": 0.1904296875, + "learning_rate": 0.0001, + "loss": 0.6155, + "step": 335 + }, + { + "epoch": 1.088, + "grad_norm": 0.2265625, + "learning_rate": 0.0001, + "loss": 0.6104, + "step": 340 + }, + { + "epoch": 1.104, + "grad_norm": 0.21875, + "learning_rate": 0.0001, + "loss": 0.5942, + "step": 345 + }, + { + "epoch": 1.12, + "grad_norm": 0.224609375, + "learning_rate": 0.0001, + "loss": 0.6177, + "step": 350 + }, + { + "epoch": 1.1360000000000001, + "grad_norm": 0.1796875, + "learning_rate": 0.0001, + "loss": 0.5307, + "step": 355 + }, + { + "epoch": 1.152, + "grad_norm": 0.1630859375, + "learning_rate": 0.0001, + "loss": 0.443, + "step": 360 + }, + { + "epoch": 1.168, + "grad_norm": 0.2431640625, + "learning_rate": 0.0001, + "loss": 0.4582, + "step": 365 + }, + { + "epoch": 1.184, + "grad_norm": 0.2138671875, + "learning_rate": 0.0001, + "loss": 0.6175, + "step": 370 + }, + { + "epoch": 1.2, + "grad_norm": 0.22265625, + "learning_rate": 0.0001, + "loss": 0.6191, + "step": 375 + }, + { + "epoch": 1.216, + "grad_norm": 0.25, + "learning_rate": 0.0001, + "loss": 0.5887, + "step": 380 + }, + { + "epoch": 1.232, + "grad_norm": 0.1962890625, + "learning_rate": 0.0001, + "loss": 0.5517, + "step": 385 + }, + { + "epoch": 1.248, + "grad_norm": 0.2109375, + "learning_rate": 0.0001, + "loss": 0.5712, + "step": 390 + }, + { + "epoch": 1.264, + "grad_norm": 0.1904296875, + "learning_rate": 0.0001, + "loss": 0.5526, + "step": 395 + }, + { + "epoch": 1.28, + "grad_norm": 0.2119140625, + "learning_rate": 0.0001, + "loss": 0.6027, + "step": 400 + }, + { + "epoch": 1.296, + "grad_norm": 0.1865234375, + "learning_rate": 0.0001, + "loss": 0.5325, + "step": 405 + }, + { + "epoch": 1.312, + "grad_norm": 0.212890625, + "learning_rate": 0.0001, + "loss": 0.4752, + "step": 410 + }, + { + "epoch": 1.328, + "grad_norm": 0.25, + "learning_rate": 0.0001, + "loss": 0.4214, + "step": 415 + }, + { + "epoch": 1.3439999999999999, + "grad_norm": 0.306640625, + "learning_rate": 0.0001, + "loss": 0.6299, + "step": 420 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 0.361328125, + "learning_rate": 0.0001, + "loss": 0.6215, + "step": 425 + }, + { + "epoch": 1.376, + "grad_norm": 0.310546875, + "learning_rate": 0.0001, + "loss": 0.5869, + "step": 430 + }, + { + "epoch": 1.392, + "grad_norm": 0.2255859375, + "learning_rate": 0.0001, + "loss": 0.5448, + "step": 435 + }, + { + "epoch": 1.408, + "grad_norm": 0.306640625, + "learning_rate": 0.0001, + "loss": 0.6038, + "step": 440 + }, + { + "epoch": 1.424, + "grad_norm": 0.37890625, + "learning_rate": 0.0001, + "loss": 0.5647, + "step": 445 + }, + { + "epoch": 1.44, + "grad_norm": 0.26953125, + "learning_rate": 0.0001, + "loss": 0.5564, + "step": 450 + }, + { + "epoch": 1.456, + "grad_norm": 0.1796875, + "learning_rate": 0.0001, + "loss": 0.4994, + "step": 455 + }, + { + "epoch": 1.472, + "grad_norm": 0.25, + "learning_rate": 0.0001, + "loss": 0.4244, + "step": 460 + }, + { + "epoch": 1.488, + "grad_norm": 0.2451171875, + "learning_rate": 0.0001, + "loss": 0.4652, + "step": 465 + }, + { + "epoch": 1.504, + "grad_norm": 0.2578125, + "learning_rate": 0.0001, + "loss": 0.5929, + "step": 470 + } + ], + "logging_steps": 5, + "max_steps": 470, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 90, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.7094162776644813e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/callgraph_c_pretrained/checkpoint-470/training_args.bin b/codellama/c/callgraph_c_pretrained/checkpoint-470/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f5a45f2746940e60226d1e7ab703007b2298cad9 --- /dev/null +++ b/codellama/c/callgraph_c_pretrained/checkpoint-470/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bebd10fa73e376c5dc7a1d5f4eeaf2de33a78c079315ba09dfc98196209d0ea7 +size 7416 diff --git a/codellama/c/callgraph_c_pretrained/completed b/codellama/c/callgraph_c_pretrained/completed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/codellama/c/callgraph_c_pretrained/metrics.json b/codellama/c/callgraph_c_pretrained/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..054fc943ad6c8a63fadf104e77fbe87823266dc3 --- /dev/null +++ b/codellama/c/callgraph_c_pretrained/metrics.json @@ -0,0 +1 @@ +{"run_name": "callgraph_c_pretrained", "train_runtime": 84777.289, "train_samples_per_second": 0.355, "train_steps_per_second": 0.006, "total_flos": 2.7094162776644813e+18, "train_loss": 0.8028768169119003, "epoch": 1.504} \ No newline at end of file diff --git a/codellama/c/callgraph_c_pretrained/train_results.json b/codellama/c/callgraph_c_pretrained/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2c76f7526e89b6a840a74cf021f4021b461be593 --- /dev/null +++ b/codellama/c/callgraph_c_pretrained/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.504, + "total_flos": 2.7094162776644813e+18, + "train_loss": 0.8028768169119003, + "train_runtime": 84777.289, + "train_samples_per_second": 0.355, + "train_steps_per_second": 0.006 +} \ No newline at end of file diff --git a/codellama/c/callgraph_c_pretrained/trainer_state.json b/codellama/c/callgraph_c_pretrained/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..57ce0360529ab3331328a0fa43063810839d353e --- /dev/null +++ b/codellama/c/callgraph_c_pretrained/trainer_state.json @@ -0,0 +1,700 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.504, + "eval_steps": 500, + "global_step": 470, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.016, + "grad_norm": 0.33984375, + "learning_rate": 0.0001, + "loss": 4.2845, + "step": 5 + }, + { + "epoch": 0.032, + "grad_norm": 0.20703125, + "learning_rate": 0.0001, + "loss": 2.611, + "step": 10 + }, + { + "epoch": 0.048, + "grad_norm": 2.8125, + "learning_rate": 0.0001, + "loss": 2.1007, + "step": 15 + }, + { + "epoch": 0.064, + "grad_norm": 1.2890625, + "learning_rate": 0.0001, + "loss": 2.0667, + "step": 20 + }, + { + "epoch": 0.08, + "grad_norm": 0.3203125, + "learning_rate": 0.0001, + "loss": 1.6745, + "step": 25 + }, + { + "epoch": 0.096, + "grad_norm": 0.33984375, + "learning_rate": 0.0001, + "loss": 1.4179, + "step": 30 + }, + { + "epoch": 0.112, + "grad_norm": 0.369140625, + "learning_rate": 0.0001, + "loss": 1.256, + "step": 35 + }, + { + "epoch": 0.128, + "grad_norm": 0.1767578125, + "learning_rate": 0.0001, + "loss": 1.1206, + "step": 40 + }, + { + "epoch": 0.144, + "grad_norm": 0.1806640625, + "learning_rate": 0.0001, + "loss": 0.8113, + "step": 45 + }, + { + "epoch": 0.16, + "grad_norm": 0.31640625, + "learning_rate": 0.0001, + "loss": 0.5563, + "step": 50 + }, + { + "epoch": 0.176, + "grad_norm": 0.47265625, + "learning_rate": 0.0001, + "loss": 1.2945, + "step": 55 + }, + { + "epoch": 0.192, + "grad_norm": 0.30078125, + "learning_rate": 0.0001, + "loss": 1.1513, + "step": 60 + }, + { + "epoch": 0.208, + "grad_norm": 0.87109375, + "learning_rate": 0.0001, + "loss": 1.0038, + "step": 65 + }, + { + "epoch": 0.224, + "grad_norm": 0.11376953125, + "learning_rate": 0.0001, + "loss": 0.9775, + "step": 70 + }, + { + "epoch": 0.24, + "grad_norm": 0.1376953125, + "learning_rate": 0.0001, + "loss": 0.9107, + "step": 75 + }, + { + "epoch": 0.256, + "grad_norm": 0.13671875, + "learning_rate": 0.0001, + "loss": 0.8357, + "step": 80 + }, + { + "epoch": 0.272, + "grad_norm": 0.2734375, + "learning_rate": 0.0001, + "loss": 0.8438, + "step": 85 + }, + { + "epoch": 0.288, + "grad_norm": 0.1318359375, + "learning_rate": 0.0001, + "loss": 0.8182, + "step": 90 + }, + { + "epoch": 0.304, + "grad_norm": 0.1220703125, + "learning_rate": 0.0001, + "loss": 0.6811, + "step": 95 + }, + { + "epoch": 0.32, + "grad_norm": 0.1474609375, + "learning_rate": 0.0001, + "loss": 0.5087, + "step": 100 + }, + { + "epoch": 0.336, + "grad_norm": 0.1767578125, + "learning_rate": 0.0001, + "loss": 0.9827, + "step": 105 + }, + { + "epoch": 0.352, + "grad_norm": 0.1943359375, + "learning_rate": 0.0001, + "loss": 0.9673, + "step": 110 + }, + { + "epoch": 0.368, + "grad_norm": 0.21484375, + "learning_rate": 0.0001, + "loss": 0.9514, + "step": 115 + }, + { + "epoch": 0.384, + "grad_norm": 0.1630859375, + "learning_rate": 0.0001, + "loss": 0.8378, + "step": 120 + }, + { + "epoch": 0.4, + "grad_norm": 0.1220703125, + "learning_rate": 0.0001, + "loss": 0.8721, + "step": 125 + }, + { + "epoch": 0.416, + "grad_norm": 0.1328125, + "learning_rate": 0.0001, + "loss": 0.8317, + "step": 130 + }, + { + "epoch": 0.432, + "grad_norm": 0.1328125, + "learning_rate": 0.0001, + "loss": 0.7948, + "step": 135 + }, + { + "epoch": 0.448, + "grad_norm": 0.1328125, + "learning_rate": 0.0001, + "loss": 0.7682, + "step": 140 + }, + { + "epoch": 0.464, + "grad_norm": 0.107421875, + "learning_rate": 0.0001, + "loss": 0.6472, + "step": 145 + }, + { + "epoch": 0.48, + "grad_norm": 0.1513671875, + "learning_rate": 0.0001, + "loss": 0.463, + "step": 150 + }, + { + "epoch": 0.496, + "grad_norm": 0.16015625, + "learning_rate": 0.0001, + "loss": 0.8907, + "step": 155 + }, + { + "epoch": 0.512, + "grad_norm": 0.1337890625, + "learning_rate": 0.0001, + "loss": 0.8254, + "step": 160 + }, + { + "epoch": 0.528, + "grad_norm": 0.146484375, + "learning_rate": 0.0001, + "loss": 0.8455, + "step": 165 + }, + { + "epoch": 0.544, + "grad_norm": 0.15625, + "learning_rate": 0.0001, + "loss": 0.8194, + "step": 170 + }, + { + "epoch": 0.56, + "grad_norm": 0.166015625, + "learning_rate": 0.0001, + "loss": 0.8291, + "step": 175 + }, + { + "epoch": 0.576, + "grad_norm": 0.1787109375, + "learning_rate": 0.0001, + "loss": 0.7265, + "step": 180 + }, + { + "epoch": 0.592, + "grad_norm": 0.208984375, + "learning_rate": 0.0001, + "loss": 0.7856, + "step": 185 + }, + { + "epoch": 0.608, + "grad_norm": 0.2216796875, + "learning_rate": 0.0001, + "loss": 0.7599, + "step": 190 + }, + { + "epoch": 0.624, + "grad_norm": 0.130859375, + "learning_rate": 0.0001, + "loss": 0.6127, + "step": 195 + }, + { + "epoch": 0.64, + "grad_norm": 0.1982421875, + "learning_rate": 0.0001, + "loss": 0.4152, + "step": 200 + }, + { + "epoch": 0.656, + "grad_norm": 0.1943359375, + "learning_rate": 0.0001, + "loss": 0.8772, + "step": 205 + }, + { + "epoch": 0.672, + "grad_norm": 0.197265625, + "learning_rate": 0.0001, + "loss": 0.7661, + "step": 210 + }, + { + "epoch": 0.688, + "grad_norm": 0.173828125, + "learning_rate": 0.0001, + "loss": 0.8362, + "step": 215 + }, + { + "epoch": 0.704, + "grad_norm": 0.18359375, + "learning_rate": 0.0001, + "loss": 0.6781, + "step": 220 + }, + { + "epoch": 0.72, + "grad_norm": 0.1708984375, + "learning_rate": 0.0001, + "loss": 0.7479, + "step": 225 + }, + { + "epoch": 0.736, + "grad_norm": 0.177734375, + "learning_rate": 0.0001, + "loss": 0.6598, + "step": 230 + }, + { + "epoch": 0.752, + "grad_norm": 0.177734375, + "learning_rate": 0.0001, + "loss": 0.7109, + "step": 235 + }, + { + "epoch": 0.768, + "grad_norm": 0.1337890625, + "learning_rate": 0.0001, + "loss": 0.6603, + "step": 240 + }, + { + "epoch": 0.784, + "grad_norm": 0.1494140625, + "learning_rate": 0.0001, + "loss": 0.5983, + "step": 245 + }, + { + "epoch": 0.8, + "grad_norm": 0.1630859375, + "learning_rate": 0.0001, + "loss": 0.3945, + "step": 250 + }, + { + "epoch": 0.816, + "grad_norm": 0.173828125, + "learning_rate": 0.0001, + "loss": 0.7734, + "step": 255 + }, + { + "epoch": 0.832, + "grad_norm": 0.2021484375, + "learning_rate": 0.0001, + "loss": 0.7553, + "step": 260 + }, + { + "epoch": 0.848, + "grad_norm": 0.177734375, + "learning_rate": 0.0001, + "loss": 0.8062, + "step": 265 + }, + { + "epoch": 0.864, + "grad_norm": 0.150390625, + "learning_rate": 0.0001, + "loss": 0.6815, + "step": 270 + }, + { + "epoch": 0.88, + "grad_norm": 0.1689453125, + "learning_rate": 0.0001, + "loss": 0.7524, + "step": 275 + }, + { + "epoch": 0.896, + "grad_norm": 0.1572265625, + "learning_rate": 0.0001, + "loss": 0.6798, + "step": 280 + }, + { + "epoch": 0.912, + "grad_norm": 0.177734375, + "learning_rate": 0.0001, + "loss": 0.7037, + "step": 285 + }, + { + "epoch": 0.928, + "grad_norm": 0.1591796875, + "learning_rate": 0.0001, + "loss": 0.6274, + "step": 290 + }, + { + "epoch": 0.944, + "grad_norm": 0.1630859375, + "learning_rate": 0.0001, + "loss": 0.6103, + "step": 295 + }, + { + "epoch": 0.96, + "grad_norm": 0.22265625, + "learning_rate": 0.0001, + "loss": 0.3983, + "step": 300 + }, + { + "epoch": 0.976, + "grad_norm": 0.1611328125, + "learning_rate": 0.0001, + "loss": 0.6683, + "step": 305 + }, + { + "epoch": 0.992, + "grad_norm": 0.1533203125, + "learning_rate": 0.0001, + "loss": 0.6045, + "step": 310 + }, + { + "epoch": 1.008, + "grad_norm": 0.1630859375, + "learning_rate": 0.0001, + "loss": 0.5759, + "step": 315 + }, + { + "epoch": 1.024, + "grad_norm": 0.1904296875, + "learning_rate": 0.0001, + "loss": 0.5826, + "step": 320 + }, + { + "epoch": 1.04, + "grad_norm": 0.2490234375, + "learning_rate": 0.0001, + "loss": 0.6502, + "step": 325 + }, + { + "epoch": 1.056, + "grad_norm": 0.2353515625, + "learning_rate": 0.0001, + "loss": 0.6278, + "step": 330 + }, + { + "epoch": 1.072, + "grad_norm": 0.1904296875, + "learning_rate": 0.0001, + "loss": 0.6155, + "step": 335 + }, + { + "epoch": 1.088, + "grad_norm": 0.2265625, + "learning_rate": 0.0001, + "loss": 0.6104, + "step": 340 + }, + { + "epoch": 1.104, + "grad_norm": 0.21875, + "learning_rate": 0.0001, + "loss": 0.5942, + "step": 345 + }, + { + "epoch": 1.12, + "grad_norm": 0.224609375, + "learning_rate": 0.0001, + "loss": 0.6177, + "step": 350 + }, + { + "epoch": 1.1360000000000001, + "grad_norm": 0.1796875, + "learning_rate": 0.0001, + "loss": 0.5307, + "step": 355 + }, + { + "epoch": 1.152, + "grad_norm": 0.1630859375, + "learning_rate": 0.0001, + "loss": 0.443, + "step": 360 + }, + { + "epoch": 1.168, + "grad_norm": 0.2431640625, + "learning_rate": 0.0001, + "loss": 0.4582, + "step": 365 + }, + { + "epoch": 1.184, + "grad_norm": 0.2138671875, + "learning_rate": 0.0001, + "loss": 0.6175, + "step": 370 + }, + { + "epoch": 1.2, + "grad_norm": 0.22265625, + "learning_rate": 0.0001, + "loss": 0.6191, + "step": 375 + }, + { + "epoch": 1.216, + "grad_norm": 0.25, + "learning_rate": 0.0001, + "loss": 0.5887, + "step": 380 + }, + { + "epoch": 1.232, + "grad_norm": 0.1962890625, + "learning_rate": 0.0001, + "loss": 0.5517, + "step": 385 + }, + { + "epoch": 1.248, + "grad_norm": 0.2109375, + "learning_rate": 0.0001, + "loss": 0.5712, + "step": 390 + }, + { + "epoch": 1.264, + "grad_norm": 0.1904296875, + "learning_rate": 0.0001, + "loss": 0.5526, + "step": 395 + }, + { + "epoch": 1.28, + "grad_norm": 0.2119140625, + "learning_rate": 0.0001, + "loss": 0.6027, + "step": 400 + }, + { + "epoch": 1.296, + "grad_norm": 0.1865234375, + "learning_rate": 0.0001, + "loss": 0.5325, + "step": 405 + }, + { + "epoch": 1.312, + "grad_norm": 0.212890625, + "learning_rate": 0.0001, + "loss": 0.4752, + "step": 410 + }, + { + "epoch": 1.328, + "grad_norm": 0.25, + "learning_rate": 0.0001, + "loss": 0.4214, + "step": 415 + }, + { + "epoch": 1.3439999999999999, + "grad_norm": 0.306640625, + "learning_rate": 0.0001, + "loss": 0.6299, + "step": 420 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 0.361328125, + "learning_rate": 0.0001, + "loss": 0.6215, + "step": 425 + }, + { + "epoch": 1.376, + "grad_norm": 0.310546875, + "learning_rate": 0.0001, + "loss": 0.5869, + "step": 430 + }, + { + "epoch": 1.392, + "grad_norm": 0.2255859375, + "learning_rate": 0.0001, + "loss": 0.5448, + "step": 435 + }, + { + "epoch": 1.408, + "grad_norm": 0.306640625, + "learning_rate": 0.0001, + "loss": 0.6038, + "step": 440 + }, + { + "epoch": 1.424, + "grad_norm": 0.37890625, + "learning_rate": 0.0001, + "loss": 0.5647, + "step": 445 + }, + { + "epoch": 1.44, + "grad_norm": 0.26953125, + "learning_rate": 0.0001, + "loss": 0.5564, + "step": 450 + }, + { + "epoch": 1.456, + "grad_norm": 0.1796875, + "learning_rate": 0.0001, + "loss": 0.4994, + "step": 455 + }, + { + "epoch": 1.472, + "grad_norm": 0.25, + "learning_rate": 0.0001, + "loss": 0.4244, + "step": 460 + }, + { + "epoch": 1.488, + "grad_norm": 0.2451171875, + "learning_rate": 0.0001, + "loss": 0.4652, + "step": 465 + }, + { + "epoch": 1.504, + "grad_norm": 0.2578125, + "learning_rate": 0.0001, + "loss": 0.5929, + "step": 470 + }, + { + "epoch": 1.504, + "step": 470, + "total_flos": 2.7094162776644813e+18, + "train_loss": 0.8028768169119003, + "train_runtime": 84777.289, + "train_samples_per_second": 0.355, + "train_steps_per_second": 0.006 + } + ], + "logging_steps": 5, + "max_steps": 470, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 90, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.7094162776644813e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/codegen/codegen_c_base/all_results.json b/codellama/c/codegen/codegen_c_base/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..02055941f9b14039a170d072735402edc5a66615 --- /dev/null +++ b/codellama/c/codegen/codegen_c_base/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.5594541910331383, + "total_flos": 4.824681746497536e+17, + "train_loss": 0.3312075719833374, + "train_runtime": 19317.2857, + "train_samples_per_second": 0.828, + "train_steps_per_second": 0.013 +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_base/checkpoint-250/README.md b/codellama/c/codegen/codegen_c_base/checkpoint-250/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codegen/codegen_c_base/checkpoint-250/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_base/checkpoint-250/adapter_config.json b/codellama/c/codegen/codegen_c_base/checkpoint-250/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..be42f67e38a580bac9b7cd446832a106bfcf5cc3 --- /dev/null +++ b/codellama/c/codegen/codegen_c_base/checkpoint-250/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "up_proj", + "down_proj", + "o_proj", + "v_proj", + "gate_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_base/checkpoint-250/adapter_model.safetensors b/codellama/c/codegen/codegen_c_base/checkpoint-250/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3872081ac5fc39f90e20a253e7268919b917ec43 --- /dev/null +++ b/codellama/c/codegen/codegen_c_base/checkpoint-250/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f9af2d97f9cea6e096df2e0b7cd922281bfc951f41b7006ff6d8dd5105ab473 +size 1156480200 diff --git a/codellama/c/codegen/codegen_c_base/checkpoint-250/adapter_model/README.md b/codellama/c/codegen/codegen_c_base/checkpoint-250/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codegen/codegen_c_base/checkpoint-250/adapter_model/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_base/checkpoint-250/adapter_model/adapter_config.json b/codellama/c/codegen/codegen_c_base/checkpoint-250/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..be42f67e38a580bac9b7cd446832a106bfcf5cc3 --- /dev/null +++ b/codellama/c/codegen/codegen_c_base/checkpoint-250/adapter_model/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "up_proj", + "down_proj", + "o_proj", + "v_proj", + "gate_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_base/checkpoint-250/adapter_model/adapter_model.safetensors b/codellama/c/codegen/codegen_c_base/checkpoint-250/adapter_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3872081ac5fc39f90e20a253e7268919b917ec43 --- /dev/null +++ b/codellama/c/codegen/codegen_c_base/checkpoint-250/adapter_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f9af2d97f9cea6e096df2e0b7cd922281bfc951f41b7006ff6d8dd5105ab473 +size 1156480200 diff --git a/codellama/c/codegen/codegen_c_base/checkpoint-250/added_tokens.json b/codellama/c/codegen/codegen_c_base/checkpoint-250/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074 --- /dev/null +++ b/codellama/c/codegen/codegen_c_base/checkpoint-250/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 32016 +} diff --git a/codellama/c/codegen/codegen_c_base/checkpoint-250/optimizer.pt b/codellama/c/codegen/codegen_c_base/checkpoint-250/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9aa280a71433e19e2db3b9bebea9de43b1863d6 --- /dev/null +++ b/codellama/c/codegen/codegen_c_base/checkpoint-250/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8653c32a777b6dbfaa813b472949171bbc2654cc1d3a2dfff819751361bc9514 +size 2003126962 diff --git a/codellama/c/codegen/codegen_c_base/checkpoint-250/rng_state.pth b/codellama/c/codegen/codegen_c_base/checkpoint-250/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d2d1ca450f51a73b9938ae4d77eda4f9e4e83adb --- /dev/null +++ b/codellama/c/codegen/codegen_c_base/checkpoint-250/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3834587c092125bd201661e37e816cdbd55b1a136077c4e0b1d7944daa54445 +size 14244 diff --git a/codellama/c/codegen/codegen_c_base/checkpoint-250/scheduler.pt b/codellama/c/codegen/codegen_c_base/checkpoint-250/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..643c118cf4c0a4a8f0d0d4818981421321bfc74c --- /dev/null +++ b/codellama/c/codegen/codegen_c_base/checkpoint-250/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e30df1bee25ad98e1c721d888a184c00d77649c8e5c8c3b1e8a4c16f9fe7f7ef +size 1064 diff --git a/codellama/c/codegen/codegen_c_base/checkpoint-250/special_tokens_map.json b/codellama/c/codegen/codegen_c_base/checkpoint-250/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d --- /dev/null +++ b/codellama/c/codegen/codegen_c_base/checkpoint-250/special_tokens_map.json @@ -0,0 +1,36 @@ +{ + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/codellama/c/codegen/codegen_c_base/checkpoint-250/tokenizer.model b/codellama/c/codegen/codegen_c_base/checkpoint-250/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4 --- /dev/null +++ b/codellama/c/codegen/codegen_c_base/checkpoint-250/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6 +size 500058 diff --git a/codellama/c/codegen/codegen_c_base/checkpoint-250/tokenizer_config.json b/codellama/c/codegen/codegen_c_base/checkpoint-250/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d --- /dev/null +++ b/codellama/c/codegen/codegen_c_base/checkpoint-250/tokenizer_config.json @@ -0,0 +1,94 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": " ", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<", + "eot_token": "▁>\\n' + system_message + '\\n< >\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "fill_token": " ", + "legacy": null, + "middle_token": "▁ ", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "[PAD]", + "padding_side": "right", + "prefix_token": "▁ ", + "sp_model_kwargs": {}, + "suffix_first": false, + "suffix_token": "▁", + "tokenizer_class": "CodeLlamaTokenizer", + "unk_token": " ", + "use_default_system_prompt": false +} diff --git a/codellama/c/codegen/codegen_c_base/checkpoint-250/trainer_state.json b/codellama/c/codegen/codegen_c_base/checkpoint-250/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..561d6ec53cedebd9a41fbd7c039cd7b157db1bee --- /dev/null +++ b/codellama/c/codegen/codegen_c_base/checkpoint-250/trainer_state.json @@ -0,0 +1,383 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.5594541910331383, + "eval_steps": 500, + "global_step": 250, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.031189083820662766, + "grad_norm": 0.0576171875, + "learning_rate": 0.0001, + "loss": 0.674, + "step": 5 + }, + { + "epoch": 0.06237816764132553, + "grad_norm": 0.052490234375, + "learning_rate": 0.0001, + "loss": 0.5118, + "step": 10 + }, + { + "epoch": 0.0935672514619883, + "grad_norm": 0.03857421875, + "learning_rate": 0.0001, + "loss": 0.4572, + "step": 15 + }, + { + "epoch": 0.12475633528265107, + "grad_norm": 0.039794921875, + "learning_rate": 0.0001, + "loss": 0.4645, + "step": 20 + }, + { + "epoch": 0.15594541910331383, + "grad_norm": 0.0537109375, + "learning_rate": 0.0001, + "loss": 0.4772, + "step": 25 + }, + { + "epoch": 0.1871345029239766, + "grad_norm": 0.059814453125, + "learning_rate": 0.0001, + "loss": 0.4466, + "step": 30 + }, + { + "epoch": 0.21832358674463936, + "grad_norm": 0.0478515625, + "learning_rate": 0.0001, + "loss": 0.4365, + "step": 35 + }, + { + "epoch": 0.24951267056530213, + "grad_norm": 0.138671875, + "learning_rate": 0.0001, + "loss": 0.4622, + "step": 40 + }, + { + "epoch": 0.2807017543859649, + "grad_norm": 0.046875, + "learning_rate": 0.0001, + "loss": 0.3409, + "step": 45 + }, + { + "epoch": 0.31189083820662766, + "grad_norm": 0.0289306640625, + "learning_rate": 0.0001, + "loss": 0.3158, + "step": 50 + }, + { + "epoch": 0.34307992202729043, + "grad_norm": 0.029541015625, + "learning_rate": 0.0001, + "loss": 0.3236, + "step": 55 + }, + { + "epoch": 0.3742690058479532, + "grad_norm": 0.0274658203125, + "learning_rate": 0.0001, + "loss": 0.3273, + "step": 60 + }, + { + "epoch": 0.40545808966861596, + "grad_norm": 0.031494140625, + "learning_rate": 0.0001, + "loss": 0.3142, + "step": 65 + }, + { + "epoch": 0.43664717348927873, + "grad_norm": 0.0269775390625, + "learning_rate": 0.0001, + "loss": 0.3276, + "step": 70 + }, + { + "epoch": 0.4678362573099415, + "grad_norm": 0.02978515625, + "learning_rate": 0.0001, + "loss": 0.334, + "step": 75 + }, + { + "epoch": 0.49902534113060426, + "grad_norm": 0.0703125, + "learning_rate": 0.0001, + "loss": 0.356, + "step": 80 + }, + { + "epoch": 0.530214424951267, + "grad_norm": 0.03271484375, + "learning_rate": 0.0001, + "loss": 0.3176, + "step": 85 + }, + { + "epoch": 0.5614035087719298, + "grad_norm": 0.0322265625, + "learning_rate": 0.0001, + "loss": 0.2829, + "step": 90 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 0.02587890625, + "learning_rate": 0.0001, + "loss": 0.2984, + "step": 95 + }, + { + "epoch": 0.6237816764132553, + "grad_norm": 0.0296630859375, + "learning_rate": 0.0001, + "loss": 0.3067, + "step": 100 + }, + { + "epoch": 0.6549707602339181, + "grad_norm": 0.0264892578125, + "learning_rate": 0.0001, + "loss": 0.2979, + "step": 105 + }, + { + "epoch": 0.6861598440545809, + "grad_norm": 0.02783203125, + "learning_rate": 0.0001, + "loss": 0.3012, + "step": 110 + }, + { + "epoch": 0.7173489278752436, + "grad_norm": 0.031494140625, + "learning_rate": 0.0001, + "loss": 0.3222, + "step": 115 + }, + { + "epoch": 0.7485380116959064, + "grad_norm": 0.06982421875, + "learning_rate": 0.0001, + "loss": 0.3439, + "step": 120 + }, + { + "epoch": 0.7797270955165692, + "grad_norm": 0.031982421875, + "learning_rate": 0.0001, + "loss": 0.2875, + "step": 125 + }, + { + "epoch": 0.8109161793372319, + "grad_norm": 0.029296875, + "learning_rate": 0.0001, + "loss": 0.2803, + "step": 130 + }, + { + "epoch": 0.8421052631578947, + "grad_norm": 0.029541015625, + "learning_rate": 0.0001, + "loss": 0.2999, + "step": 135 + }, + { + "epoch": 0.8732943469785575, + "grad_norm": 0.02734375, + "learning_rate": 0.0001, + "loss": 0.2986, + "step": 140 + }, + { + "epoch": 0.9044834307992202, + "grad_norm": 0.028076171875, + "learning_rate": 0.0001, + "loss": 0.302, + "step": 145 + }, + { + "epoch": 0.935672514619883, + "grad_norm": 0.0322265625, + "learning_rate": 0.0001, + "loss": 0.2936, + "step": 150 + }, + { + "epoch": 0.9668615984405458, + "grad_norm": 0.034423828125, + "learning_rate": 0.0001, + "loss": 0.3017, + "step": 155 + }, + { + "epoch": 0.9980506822612085, + "grad_norm": 0.060546875, + "learning_rate": 0.0001, + "loss": 0.3486, + "step": 160 + }, + { + "epoch": 1.0292397660818713, + "grad_norm": 0.033935546875, + "learning_rate": 0.0001, + "loss": 0.2867, + "step": 165 + }, + { + "epoch": 1.060428849902534, + "grad_norm": 0.031494140625, + "learning_rate": 0.0001, + "loss": 0.2628, + "step": 170 + }, + { + "epoch": 1.0916179337231968, + "grad_norm": 0.0289306640625, + "learning_rate": 0.0001, + "loss": 0.278, + "step": 175 + }, + { + "epoch": 1.1228070175438596, + "grad_norm": 0.031982421875, + "learning_rate": 0.0001, + "loss": 0.2915, + "step": 180 + }, + { + "epoch": 1.1539961013645224, + "grad_norm": 0.031005859375, + "learning_rate": 0.0001, + "loss": 0.2902, + "step": 185 + }, + { + "epoch": 1.1851851851851851, + "grad_norm": 0.03271484375, + "learning_rate": 0.0001, + "loss": 0.2785, + "step": 190 + }, + { + "epoch": 1.2163742690058479, + "grad_norm": 0.042236328125, + "learning_rate": 0.0001, + "loss": 0.295, + "step": 195 + }, + { + "epoch": 1.2475633528265107, + "grad_norm": 0.05615234375, + "learning_rate": 0.0001, + "loss": 0.3039, + "step": 200 + }, + { + "epoch": 1.2787524366471734, + "grad_norm": 0.03759765625, + "learning_rate": 0.0001, + "loss": 0.2725, + "step": 205 + }, + { + "epoch": 1.3099415204678362, + "grad_norm": 0.035400390625, + "learning_rate": 0.0001, + "loss": 0.2738, + "step": 210 + }, + { + "epoch": 1.341130604288499, + "grad_norm": 0.031494140625, + "learning_rate": 0.0001, + "loss": 0.2861, + "step": 215 + }, + { + "epoch": 1.3723196881091617, + "grad_norm": 0.036865234375, + "learning_rate": 0.0001, + "loss": 0.2859, + "step": 220 + }, + { + "epoch": 1.4035087719298245, + "grad_norm": 0.03515625, + "learning_rate": 0.0001, + "loss": 0.285, + "step": 225 + }, + { + "epoch": 1.4346978557504872, + "grad_norm": 0.03466796875, + "learning_rate": 0.0001, + "loss": 0.2713, + "step": 230 + }, + { + "epoch": 1.46588693957115, + "grad_norm": 0.042724609375, + "learning_rate": 0.0001, + "loss": 0.2982, + "step": 235 + }, + { + "epoch": 1.4970760233918128, + "grad_norm": 0.0673828125, + "learning_rate": 0.0001, + "loss": 0.3026, + "step": 240 + }, + { + "epoch": 1.5282651072124755, + "grad_norm": 0.04150390625, + "learning_rate": 0.0001, + "loss": 0.2916, + "step": 245 + }, + { + "epoch": 1.5594541910331383, + "grad_norm": 0.034912109375, + "learning_rate": 0.0001, + "loss": 0.2548, + "step": 250 + } + ], + "logging_steps": 5, + "max_steps": 250, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 90, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 4.824681746497536e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/codegen/codegen_c_base/checkpoint-250/training_args.bin b/codellama/c/codegen/codegen_c_base/checkpoint-250/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0819c622349e0b72603c14f0679b6adc20d5d368 --- /dev/null +++ b/codellama/c/codegen/codegen_c_base/checkpoint-250/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8bc8838bb382bdcbf063c18ead9f8633ed8301a08e8edced773ee484a51f89a +size 7416 diff --git a/codellama/c/codegen/codegen_c_base/completed b/codellama/c/codegen/codegen_c_base/completed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/codellama/c/codegen/codegen_c_base/metrics.json b/codellama/c/codegen/codegen_c_base/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..9de6ba719442376e9a0b8ccf046457ffb2945b85 --- /dev/null +++ b/codellama/c/codegen/codegen_c_base/metrics.json @@ -0,0 +1 @@ +{"run_name": "codegen_c_base", "train_runtime": 19317.2857, "train_samples_per_second": 0.828, "train_steps_per_second": 0.013, "total_flos": 4.824681746497536e+17, "train_loss": 0.3312075719833374, "epoch": 1.5594541910331383} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_base/train_results.json b/codellama/c/codegen/codegen_c_base/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..02055941f9b14039a170d072735402edc5a66615 --- /dev/null +++ b/codellama/c/codegen/codegen_c_base/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.5594541910331383, + "total_flos": 4.824681746497536e+17, + "train_loss": 0.3312075719833374, + "train_runtime": 19317.2857, + "train_samples_per_second": 0.828, + "train_steps_per_second": 0.013 +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_base/trainer_state.json b/codellama/c/codegen/codegen_c_base/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..784d6c5b92a99774504eca0f3aa71c033d9842d6 --- /dev/null +++ b/codellama/c/codegen/codegen_c_base/trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.5594541910331383, + "eval_steps": 500, + "global_step": 250, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.031189083820662766, + "grad_norm": 0.0576171875, + "learning_rate": 0.0001, + "loss": 0.674, + "step": 5 + }, + { + "epoch": 0.06237816764132553, + "grad_norm": 0.052490234375, + "learning_rate": 0.0001, + "loss": 0.5118, + "step": 10 + }, + { + "epoch": 0.0935672514619883, + "grad_norm": 0.03857421875, + "learning_rate": 0.0001, + "loss": 0.4572, + "step": 15 + }, + { + "epoch": 0.12475633528265107, + "grad_norm": 0.039794921875, + "learning_rate": 0.0001, + "loss": 0.4645, + "step": 20 + }, + { + "epoch": 0.15594541910331383, + "grad_norm": 0.0537109375, + "learning_rate": 0.0001, + "loss": 0.4772, + "step": 25 + }, + { + "epoch": 0.1871345029239766, + "grad_norm": 0.059814453125, + "learning_rate": 0.0001, + "loss": 0.4466, + "step": 30 + }, + { + "epoch": 0.21832358674463936, + "grad_norm": 0.0478515625, + "learning_rate": 0.0001, + "loss": 0.4365, + "step": 35 + }, + { + "epoch": 0.24951267056530213, + "grad_norm": 0.138671875, + "learning_rate": 0.0001, + "loss": 0.4622, + "step": 40 + }, + { + "epoch": 0.2807017543859649, + "grad_norm": 0.046875, + "learning_rate": 0.0001, + "loss": 0.3409, + "step": 45 + }, + { + "epoch": 0.31189083820662766, + "grad_norm": 0.0289306640625, + "learning_rate": 0.0001, + "loss": 0.3158, + "step": 50 + }, + { + "epoch": 0.34307992202729043, + "grad_norm": 0.029541015625, + "learning_rate": 0.0001, + "loss": 0.3236, + "step": 55 + }, + { + "epoch": 0.3742690058479532, + "grad_norm": 0.0274658203125, + "learning_rate": 0.0001, + "loss": 0.3273, + "step": 60 + }, + { + "epoch": 0.40545808966861596, + "grad_norm": 0.031494140625, + "learning_rate": 0.0001, + "loss": 0.3142, + "step": 65 + }, + { + "epoch": 0.43664717348927873, + "grad_norm": 0.0269775390625, + "learning_rate": 0.0001, + "loss": 0.3276, + "step": 70 + }, + { + "epoch": 0.4678362573099415, + "grad_norm": 0.02978515625, + "learning_rate": 0.0001, + "loss": 0.334, + "step": 75 + }, + { + "epoch": 0.49902534113060426, + "grad_norm": 0.0703125, + "learning_rate": 0.0001, + "loss": 0.356, + "step": 80 + }, + { + "epoch": 0.530214424951267, + "grad_norm": 0.03271484375, + "learning_rate": 0.0001, + "loss": 0.3176, + "step": 85 + }, + { + "epoch": 0.5614035087719298, + "grad_norm": 0.0322265625, + "learning_rate": 0.0001, + "loss": 0.2829, + "step": 90 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 0.02587890625, + "learning_rate": 0.0001, + "loss": 0.2984, + "step": 95 + }, + { + "epoch": 0.6237816764132553, + "grad_norm": 0.0296630859375, + "learning_rate": 0.0001, + "loss": 0.3067, + "step": 100 + }, + { + "epoch": 0.6549707602339181, + "grad_norm": 0.0264892578125, + "learning_rate": 0.0001, + "loss": 0.2979, + "step": 105 + }, + { + "epoch": 0.6861598440545809, + "grad_norm": 0.02783203125, + "learning_rate": 0.0001, + "loss": 0.3012, + "step": 110 + }, + { + "epoch": 0.7173489278752436, + "grad_norm": 0.031494140625, + "learning_rate": 0.0001, + "loss": 0.3222, + "step": 115 + }, + { + "epoch": 0.7485380116959064, + "grad_norm": 0.06982421875, + "learning_rate": 0.0001, + "loss": 0.3439, + "step": 120 + }, + { + "epoch": 0.7797270955165692, + "grad_norm": 0.031982421875, + "learning_rate": 0.0001, + "loss": 0.2875, + "step": 125 + }, + { + "epoch": 0.8109161793372319, + "grad_norm": 0.029296875, + "learning_rate": 0.0001, + "loss": 0.2803, + "step": 130 + }, + { + "epoch": 0.8421052631578947, + "grad_norm": 0.029541015625, + "learning_rate": 0.0001, + "loss": 0.2999, + "step": 135 + }, + { + "epoch": 0.8732943469785575, + "grad_norm": 0.02734375, + "learning_rate": 0.0001, + "loss": 0.2986, + "step": 140 + }, + { + "epoch": 0.9044834307992202, + "grad_norm": 0.028076171875, + "learning_rate": 0.0001, + "loss": 0.302, + "step": 145 + }, + { + "epoch": 0.935672514619883, + "grad_norm": 0.0322265625, + "learning_rate": 0.0001, + "loss": 0.2936, + "step": 150 + }, + { + "epoch": 0.9668615984405458, + "grad_norm": 0.034423828125, + "learning_rate": 0.0001, + "loss": 0.3017, + "step": 155 + }, + { + "epoch": 0.9980506822612085, + "grad_norm": 0.060546875, + "learning_rate": 0.0001, + "loss": 0.3486, + "step": 160 + }, + { + "epoch": 1.0292397660818713, + "grad_norm": 0.033935546875, + "learning_rate": 0.0001, + "loss": 0.2867, + "step": 165 + }, + { + "epoch": 1.060428849902534, + "grad_norm": 0.031494140625, + "learning_rate": 0.0001, + "loss": 0.2628, + "step": 170 + }, + { + "epoch": 1.0916179337231968, + "grad_norm": 0.0289306640625, + "learning_rate": 0.0001, + "loss": 0.278, + "step": 175 + }, + { + "epoch": 1.1228070175438596, + "grad_norm": 0.031982421875, + "learning_rate": 0.0001, + "loss": 0.2915, + "step": 180 + }, + { + "epoch": 1.1539961013645224, + "grad_norm": 0.031005859375, + "learning_rate": 0.0001, + "loss": 0.2902, + "step": 185 + }, + { + "epoch": 1.1851851851851851, + "grad_norm": 0.03271484375, + "learning_rate": 0.0001, + "loss": 0.2785, + "step": 190 + }, + { + "epoch": 1.2163742690058479, + "grad_norm": 0.042236328125, + "learning_rate": 0.0001, + "loss": 0.295, + "step": 195 + }, + { + "epoch": 1.2475633528265107, + "grad_norm": 0.05615234375, + "learning_rate": 0.0001, + "loss": 0.3039, + "step": 200 + }, + { + "epoch": 1.2787524366471734, + "grad_norm": 0.03759765625, + "learning_rate": 0.0001, + "loss": 0.2725, + "step": 205 + }, + { + "epoch": 1.3099415204678362, + "grad_norm": 0.035400390625, + "learning_rate": 0.0001, + "loss": 0.2738, + "step": 210 + }, + { + "epoch": 1.341130604288499, + "grad_norm": 0.031494140625, + "learning_rate": 0.0001, + "loss": 0.2861, + "step": 215 + }, + { + "epoch": 1.3723196881091617, + "grad_norm": 0.036865234375, + "learning_rate": 0.0001, + "loss": 0.2859, + "step": 220 + }, + { + "epoch": 1.4035087719298245, + "grad_norm": 0.03515625, + "learning_rate": 0.0001, + "loss": 0.285, + "step": 225 + }, + { + "epoch": 1.4346978557504872, + "grad_norm": 0.03466796875, + "learning_rate": 0.0001, + "loss": 0.2713, + "step": 230 + }, + { + "epoch": 1.46588693957115, + "grad_norm": 0.042724609375, + "learning_rate": 0.0001, + "loss": 0.2982, + "step": 235 + }, + { + "epoch": 1.4970760233918128, + "grad_norm": 0.0673828125, + "learning_rate": 0.0001, + "loss": 0.3026, + "step": 240 + }, + { + "epoch": 1.5282651072124755, + "grad_norm": 0.04150390625, + "learning_rate": 0.0001, + "loss": 0.2916, + "step": 245 + }, + { + "epoch": 1.5594541910331383, + "grad_norm": 0.034912109375, + "learning_rate": 0.0001, + "loss": 0.2548, + "step": 250 + }, + { + "epoch": 1.5594541910331383, + "step": 250, + "total_flos": 4.824681746497536e+17, + "train_loss": 0.3312075719833374, + "train_runtime": 19317.2857, + "train_samples_per_second": 0.828, + "train_steps_per_second": 0.013 + } + ], + "logging_steps": 5, + "max_steps": 250, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 90, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 4.824681746497536e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/codegen/codegen_c_callgraph/all_results.json b/codellama/c/codegen/codegen_c_callgraph/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..53c61337ae5b3fa1cb64171928dfaee6080928f5 --- /dev/null +++ b/codellama/c/codegen/codegen_c_callgraph/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.5594541910331383, + "total_flos": 4.824681746497536e+17, + "train_loss": 0.3939001045227051, + "train_runtime": 16242.9143, + "train_samples_per_second": 0.985, + "train_steps_per_second": 0.015 +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/README.md b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/adapter_config.json b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2df42068646d06bd267f7a367097afb2c2f1274e --- /dev/null +++ b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "v_proj", + "q_proj", + "down_proj", + "up_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/adapter_model.safetensors b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df984e9b104e9ebf2c62a1575b9eba8bc03ef26f --- /dev/null +++ b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9182b2abad902e36d0f55bf5bca9fd4e5fffabf8e756938acd3954b72bc17e48 +size 1156480200 diff --git a/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/adapter_model/README.md b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/adapter_model/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/adapter_model/adapter_config.json b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2df42068646d06bd267f7a367097afb2c2f1274e --- /dev/null +++ b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/adapter_model/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "v_proj", + "q_proj", + "down_proj", + "up_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/adapter_model/adapter_model.safetensors b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/adapter_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df984e9b104e9ebf2c62a1575b9eba8bc03ef26f --- /dev/null +++ b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/adapter_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9182b2abad902e36d0f55bf5bca9fd4e5fffabf8e756938acd3954b72bc17e48 +size 1156480200 diff --git a/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/added_tokens.json b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074 --- /dev/null +++ b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 32016 +} diff --git a/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/optimizer.pt b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7df1a9a6f393b85bb1d82014a65263222a386655 --- /dev/null +++ b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9f9314ec224f5aec674269a51250a37ed01edf26ef196969694857ec69647f0 +size 2003126962 diff --git a/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/rng_state.pth b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d2d1ca450f51a73b9938ae4d77eda4f9e4e83adb --- /dev/null +++ b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3834587c092125bd201661e37e816cdbd55b1a136077c4e0b1d7944daa54445 +size 14244 diff --git a/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/scheduler.pt b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..643c118cf4c0a4a8f0d0d4818981421321bfc74c --- /dev/null +++ b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e30df1bee25ad98e1c721d888a184c00d77649c8e5c8c3b1e8a4c16f9fe7f7ef +size 1064 diff --git a/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/special_tokens_map.json b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d --- /dev/null +++ b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/special_tokens_map.json @@ -0,0 +1,36 @@ +{ + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/tokenizer.model b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4 --- /dev/null +++ b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6 +size 500058 diff --git a/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/tokenizer_config.json b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d --- /dev/null +++ b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/tokenizer_config.json @@ -0,0 +1,94 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": " ", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<", + "eot_token": "▁>\\n' + system_message + '\\n< >\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "fill_token": " ", + "legacy": null, + "middle_token": "▁ ", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "[PAD]", + "padding_side": "right", + "prefix_token": "▁ ", + "sp_model_kwargs": {}, + "suffix_first": false, + "suffix_token": "▁", + "tokenizer_class": "CodeLlamaTokenizer", + "unk_token": " ", + "use_default_system_prompt": false +} diff --git a/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/trainer_state.json b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..94841cd9adf7700c18b59ba6736775b4933aa308 --- /dev/null +++ b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/trainer_state.json @@ -0,0 +1,383 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.5594541910331383, + "eval_steps": 500, + "global_step": 250, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.031189083820662766, + "grad_norm": 0.138671875, + "learning_rate": 0.0001, + "loss": 3.833, + "step": 5 + }, + { + "epoch": 0.06237816764132553, + "grad_norm": 0.05322265625, + "learning_rate": 0.0001, + "loss": 0.4376, + "step": 10 + }, + { + "epoch": 0.0935672514619883, + "grad_norm": 0.06494140625, + "learning_rate": 0.0001, + "loss": 0.4347, + "step": 15 + }, + { + "epoch": 0.12475633528265107, + "grad_norm": 0.059326171875, + "learning_rate": 0.0001, + "loss": 0.4465, + "step": 20 + }, + { + "epoch": 0.15594541910331383, + "grad_norm": 0.060791015625, + "learning_rate": 0.0001, + "loss": 0.4576, + "step": 25 + }, + { + "epoch": 0.1871345029239766, + "grad_norm": 0.0703125, + "learning_rate": 0.0001, + "loss": 0.4395, + "step": 30 + }, + { + "epoch": 0.21832358674463936, + "grad_norm": 0.051513671875, + "learning_rate": 0.0001, + "loss": 0.4391, + "step": 35 + }, + { + "epoch": 0.24951267056530213, + "grad_norm": 0.150390625, + "learning_rate": 0.0001, + "loss": 0.4827, + "step": 40 + }, + { + "epoch": 0.2807017543859649, + "grad_norm": 0.047119140625, + "learning_rate": 0.0001, + "loss": 0.3364, + "step": 45 + }, + { + "epoch": 0.31189083820662766, + "grad_norm": 0.032958984375, + "learning_rate": 0.0001, + "loss": 0.3183, + "step": 50 + }, + { + "epoch": 0.34307992202729043, + "grad_norm": 0.033447265625, + "learning_rate": 0.0001, + "loss": 0.3259, + "step": 55 + }, + { + "epoch": 0.3742690058479532, + "grad_norm": 0.032470703125, + "learning_rate": 0.0001, + "loss": 0.3314, + "step": 60 + }, + { + "epoch": 0.40545808966861596, + "grad_norm": 0.033203125, + "learning_rate": 0.0001, + "loss": 0.3172, + "step": 65 + }, + { + "epoch": 0.43664717348927873, + "grad_norm": 0.0308837890625, + "learning_rate": 0.0001, + "loss": 0.3306, + "step": 70 + }, + { + "epoch": 0.4678362573099415, + "grad_norm": 0.035400390625, + "learning_rate": 0.0001, + "loss": 0.3365, + "step": 75 + }, + { + "epoch": 0.49902534113060426, + "grad_norm": 0.083984375, + "learning_rate": 0.0001, + "loss": 0.3592, + "step": 80 + }, + { + "epoch": 0.530214424951267, + "grad_norm": 0.043212890625, + "learning_rate": 0.0001, + "loss": 0.3191, + "step": 85 + }, + { + "epoch": 0.5614035087719298, + "grad_norm": 0.0311279296875, + "learning_rate": 0.0001, + "loss": 0.2838, + "step": 90 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 0.039794921875, + "learning_rate": 0.0001, + "loss": 0.3003, + "step": 95 + }, + { + "epoch": 0.6237816764132553, + "grad_norm": 0.03076171875, + "learning_rate": 0.0001, + "loss": 0.3073, + "step": 100 + }, + { + "epoch": 0.6549707602339181, + "grad_norm": 0.031982421875, + "learning_rate": 0.0001, + "loss": 0.3, + "step": 105 + }, + { + "epoch": 0.6861598440545809, + "grad_norm": 0.033203125, + "learning_rate": 0.0001, + "loss": 0.3031, + "step": 110 + }, + { + "epoch": 0.7173489278752436, + "grad_norm": 0.037841796875, + "learning_rate": 0.0001, + "loss": 0.3237, + "step": 115 + }, + { + "epoch": 0.7485380116959064, + "grad_norm": 0.0771484375, + "learning_rate": 0.0001, + "loss": 0.3449, + "step": 120 + }, + { + "epoch": 0.7797270955165692, + "grad_norm": 0.03662109375, + "learning_rate": 0.0001, + "loss": 0.2893, + "step": 125 + }, + { + "epoch": 0.8109161793372319, + "grad_norm": 0.0301513671875, + "learning_rate": 0.0001, + "loss": 0.2818, + "step": 130 + }, + { + "epoch": 0.8421052631578947, + "grad_norm": 0.03125, + "learning_rate": 0.0001, + "loss": 0.3003, + "step": 135 + }, + { + "epoch": 0.8732943469785575, + "grad_norm": 0.037841796875, + "learning_rate": 0.0001, + "loss": 0.2999, + "step": 140 + }, + { + "epoch": 0.9044834307992202, + "grad_norm": 0.0458984375, + "learning_rate": 0.0001, + "loss": 0.3044, + "step": 145 + }, + { + "epoch": 0.935672514619883, + "grad_norm": 0.03466796875, + "learning_rate": 0.0001, + "loss": 0.2954, + "step": 150 + }, + { + "epoch": 0.9668615984405458, + "grad_norm": 0.03759765625, + "learning_rate": 0.0001, + "loss": 0.304, + "step": 155 + }, + { + "epoch": 0.9980506822612085, + "grad_norm": 0.07421875, + "learning_rate": 0.0001, + "loss": 0.351, + "step": 160 + }, + { + "epoch": 1.0292397660818713, + "grad_norm": 0.03564453125, + "learning_rate": 0.0001, + "loss": 0.2895, + "step": 165 + }, + { + "epoch": 1.060428849902534, + "grad_norm": 0.03271484375, + "learning_rate": 0.0001, + "loss": 0.2658, + "step": 170 + }, + { + "epoch": 1.0916179337231968, + "grad_norm": 0.031494140625, + "learning_rate": 0.0001, + "loss": 0.2809, + "step": 175 + }, + { + "epoch": 1.1228070175438596, + "grad_norm": 0.031982421875, + "learning_rate": 0.0001, + "loss": 0.2943, + "step": 180 + }, + { + "epoch": 1.1539961013645224, + "grad_norm": 0.0322265625, + "learning_rate": 0.0001, + "loss": 0.2932, + "step": 185 + }, + { + "epoch": 1.1851851851851851, + "grad_norm": 0.033935546875, + "learning_rate": 0.0001, + "loss": 0.2806, + "step": 190 + }, + { + "epoch": 1.2163742690058479, + "grad_norm": 0.0419921875, + "learning_rate": 0.0001, + "loss": 0.298, + "step": 195 + }, + { + "epoch": 1.2475633528265107, + "grad_norm": 0.062255859375, + "learning_rate": 0.0001, + "loss": 0.3069, + "step": 200 + }, + { + "epoch": 1.2787524366471734, + "grad_norm": 0.0419921875, + "learning_rate": 0.0001, + "loss": 0.2752, + "step": 205 + }, + { + "epoch": 1.3099415204678362, + "grad_norm": 0.03515625, + "learning_rate": 0.0001, + "loss": 0.276, + "step": 210 + }, + { + "epoch": 1.341130604288499, + "grad_norm": 0.032958984375, + "learning_rate": 0.0001, + "loss": 0.2891, + "step": 215 + }, + { + "epoch": 1.3723196881091617, + "grad_norm": 0.033447265625, + "learning_rate": 0.0001, + "loss": 0.2886, + "step": 220 + }, + { + "epoch": 1.4035087719298245, + "grad_norm": 0.035888671875, + "learning_rate": 0.0001, + "loss": 0.289, + "step": 225 + }, + { + "epoch": 1.4346978557504872, + "grad_norm": 0.03564453125, + "learning_rate": 0.0001, + "loss": 0.274, + "step": 230 + }, + { + "epoch": 1.46588693957115, + "grad_norm": 0.04296875, + "learning_rate": 0.0001, + "loss": 0.3011, + "step": 235 + }, + { + "epoch": 1.4970760233918128, + "grad_norm": 0.07080078125, + "learning_rate": 0.0001, + "loss": 0.306, + "step": 240 + }, + { + "epoch": 1.5282651072124755, + "grad_norm": 0.0439453125, + "learning_rate": 0.0001, + "loss": 0.2949, + "step": 245 + }, + { + "epoch": 1.5594541910331383, + "grad_norm": 0.032470703125, + "learning_rate": 0.0001, + "loss": 0.2576, + "step": 250 + } + ], + "logging_steps": 5, + "max_steps": 250, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 90, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 4.824681746497536e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/training_args.bin b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7cb57df31efe592d3e09824e274d70651e02d209 --- /dev/null +++ b/codellama/c/codegen/codegen_c_callgraph/checkpoint-250/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:480f7814211f8beec0602b9e627a47020adb85645fbb25783f4060bee7d3a607 +size 7416 diff --git a/codellama/c/codegen/codegen_c_callgraph/completed b/codellama/c/codegen/codegen_c_callgraph/completed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/codellama/c/codegen/codegen_c_callgraph/metrics.json b/codellama/c/codegen/codegen_c_callgraph/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..c1dc2ac43ad220620ccd2dd1fcfad890d22445e6 --- /dev/null +++ b/codellama/c/codegen/codegen_c_callgraph/metrics.json @@ -0,0 +1 @@ +{"run_name": "codegen_c_callgraph", "train_runtime": 16242.9143, "train_samples_per_second": 0.985, "train_steps_per_second": 0.015, "total_flos": 4.824681746497536e+17, "train_loss": 0.3939001045227051, "epoch": 1.5594541910331383} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_callgraph/train_results.json b/codellama/c/codegen/codegen_c_callgraph/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..53c61337ae5b3fa1cb64171928dfaee6080928f5 --- /dev/null +++ b/codellama/c/codegen/codegen_c_callgraph/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.5594541910331383, + "total_flos": 4.824681746497536e+17, + "train_loss": 0.3939001045227051, + "train_runtime": 16242.9143, + "train_samples_per_second": 0.985, + "train_steps_per_second": 0.015 +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_callgraph/trainer_state.json b/codellama/c/codegen/codegen_c_callgraph/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c218694e076f787340960f0a16f38ddac33415db --- /dev/null +++ b/codellama/c/codegen/codegen_c_callgraph/trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.5594541910331383, + "eval_steps": 500, + "global_step": 250, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.031189083820662766, + "grad_norm": 0.138671875, + "learning_rate": 0.0001, + "loss": 3.833, + "step": 5 + }, + { + "epoch": 0.06237816764132553, + "grad_norm": 0.05322265625, + "learning_rate": 0.0001, + "loss": 0.4376, + "step": 10 + }, + { + "epoch": 0.0935672514619883, + "grad_norm": 0.06494140625, + "learning_rate": 0.0001, + "loss": 0.4347, + "step": 15 + }, + { + "epoch": 0.12475633528265107, + "grad_norm": 0.059326171875, + "learning_rate": 0.0001, + "loss": 0.4465, + "step": 20 + }, + { + "epoch": 0.15594541910331383, + "grad_norm": 0.060791015625, + "learning_rate": 0.0001, + "loss": 0.4576, + "step": 25 + }, + { + "epoch": 0.1871345029239766, + "grad_norm": 0.0703125, + "learning_rate": 0.0001, + "loss": 0.4395, + "step": 30 + }, + { + "epoch": 0.21832358674463936, + "grad_norm": 0.051513671875, + "learning_rate": 0.0001, + "loss": 0.4391, + "step": 35 + }, + { + "epoch": 0.24951267056530213, + "grad_norm": 0.150390625, + "learning_rate": 0.0001, + "loss": 0.4827, + "step": 40 + }, + { + "epoch": 0.2807017543859649, + "grad_norm": 0.047119140625, + "learning_rate": 0.0001, + "loss": 0.3364, + "step": 45 + }, + { + "epoch": 0.31189083820662766, + "grad_norm": 0.032958984375, + "learning_rate": 0.0001, + "loss": 0.3183, + "step": 50 + }, + { + "epoch": 0.34307992202729043, + "grad_norm": 0.033447265625, + "learning_rate": 0.0001, + "loss": 0.3259, + "step": 55 + }, + { + "epoch": 0.3742690058479532, + "grad_norm": 0.032470703125, + "learning_rate": 0.0001, + "loss": 0.3314, + "step": 60 + }, + { + "epoch": 0.40545808966861596, + "grad_norm": 0.033203125, + "learning_rate": 0.0001, + "loss": 0.3172, + "step": 65 + }, + { + "epoch": 0.43664717348927873, + "grad_norm": 0.0308837890625, + "learning_rate": 0.0001, + "loss": 0.3306, + "step": 70 + }, + { + "epoch": 0.4678362573099415, + "grad_norm": 0.035400390625, + "learning_rate": 0.0001, + "loss": 0.3365, + "step": 75 + }, + { + "epoch": 0.49902534113060426, + "grad_norm": 0.083984375, + "learning_rate": 0.0001, + "loss": 0.3592, + "step": 80 + }, + { + "epoch": 0.530214424951267, + "grad_norm": 0.043212890625, + "learning_rate": 0.0001, + "loss": 0.3191, + "step": 85 + }, + { + "epoch": 0.5614035087719298, + "grad_norm": 0.0311279296875, + "learning_rate": 0.0001, + "loss": 0.2838, + "step": 90 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 0.039794921875, + "learning_rate": 0.0001, + "loss": 0.3003, + "step": 95 + }, + { + "epoch": 0.6237816764132553, + "grad_norm": 0.03076171875, + "learning_rate": 0.0001, + "loss": 0.3073, + "step": 100 + }, + { + "epoch": 0.6549707602339181, + "grad_norm": 0.031982421875, + "learning_rate": 0.0001, + "loss": 0.3, + "step": 105 + }, + { + "epoch": 0.6861598440545809, + "grad_norm": 0.033203125, + "learning_rate": 0.0001, + "loss": 0.3031, + "step": 110 + }, + { + "epoch": 0.7173489278752436, + "grad_norm": 0.037841796875, + "learning_rate": 0.0001, + "loss": 0.3237, + "step": 115 + }, + { + "epoch": 0.7485380116959064, + "grad_norm": 0.0771484375, + "learning_rate": 0.0001, + "loss": 0.3449, + "step": 120 + }, + { + "epoch": 0.7797270955165692, + "grad_norm": 0.03662109375, + "learning_rate": 0.0001, + "loss": 0.2893, + "step": 125 + }, + { + "epoch": 0.8109161793372319, + "grad_norm": 0.0301513671875, + "learning_rate": 0.0001, + "loss": 0.2818, + "step": 130 + }, + { + "epoch": 0.8421052631578947, + "grad_norm": 0.03125, + "learning_rate": 0.0001, + "loss": 0.3003, + "step": 135 + }, + { + "epoch": 0.8732943469785575, + "grad_norm": 0.037841796875, + "learning_rate": 0.0001, + "loss": 0.2999, + "step": 140 + }, + { + "epoch": 0.9044834307992202, + "grad_norm": 0.0458984375, + "learning_rate": 0.0001, + "loss": 0.3044, + "step": 145 + }, + { + "epoch": 0.935672514619883, + "grad_norm": 0.03466796875, + "learning_rate": 0.0001, + "loss": 0.2954, + "step": 150 + }, + { + "epoch": 0.9668615984405458, + "grad_norm": 0.03759765625, + "learning_rate": 0.0001, + "loss": 0.304, + "step": 155 + }, + { + "epoch": 0.9980506822612085, + "grad_norm": 0.07421875, + "learning_rate": 0.0001, + "loss": 0.351, + "step": 160 + }, + { + "epoch": 1.0292397660818713, + "grad_norm": 0.03564453125, + "learning_rate": 0.0001, + "loss": 0.2895, + "step": 165 + }, + { + "epoch": 1.060428849902534, + "grad_norm": 0.03271484375, + "learning_rate": 0.0001, + "loss": 0.2658, + "step": 170 + }, + { + "epoch": 1.0916179337231968, + "grad_norm": 0.031494140625, + "learning_rate": 0.0001, + "loss": 0.2809, + "step": 175 + }, + { + "epoch": 1.1228070175438596, + "grad_norm": 0.031982421875, + "learning_rate": 0.0001, + "loss": 0.2943, + "step": 180 + }, + { + "epoch": 1.1539961013645224, + "grad_norm": 0.0322265625, + "learning_rate": 0.0001, + "loss": 0.2932, + "step": 185 + }, + { + "epoch": 1.1851851851851851, + "grad_norm": 0.033935546875, + "learning_rate": 0.0001, + "loss": 0.2806, + "step": 190 + }, + { + "epoch": 1.2163742690058479, + "grad_norm": 0.0419921875, + "learning_rate": 0.0001, + "loss": 0.298, + "step": 195 + }, + { + "epoch": 1.2475633528265107, + "grad_norm": 0.062255859375, + "learning_rate": 0.0001, + "loss": 0.3069, + "step": 200 + }, + { + "epoch": 1.2787524366471734, + "grad_norm": 0.0419921875, + "learning_rate": 0.0001, + "loss": 0.2752, + "step": 205 + }, + { + "epoch": 1.3099415204678362, + "grad_norm": 0.03515625, + "learning_rate": 0.0001, + "loss": 0.276, + "step": 210 + }, + { + "epoch": 1.341130604288499, + "grad_norm": 0.032958984375, + "learning_rate": 0.0001, + "loss": 0.2891, + "step": 215 + }, + { + "epoch": 1.3723196881091617, + "grad_norm": 0.033447265625, + "learning_rate": 0.0001, + "loss": 0.2886, + "step": 220 + }, + { + "epoch": 1.4035087719298245, + "grad_norm": 0.035888671875, + "learning_rate": 0.0001, + "loss": 0.289, + "step": 225 + }, + { + "epoch": 1.4346978557504872, + "grad_norm": 0.03564453125, + "learning_rate": 0.0001, + "loss": 0.274, + "step": 230 + }, + { + "epoch": 1.46588693957115, + "grad_norm": 0.04296875, + "learning_rate": 0.0001, + "loss": 0.3011, + "step": 235 + }, + { + "epoch": 1.4970760233918128, + "grad_norm": 0.07080078125, + "learning_rate": 0.0001, + "loss": 0.306, + "step": 240 + }, + { + "epoch": 1.5282651072124755, + "grad_norm": 0.0439453125, + "learning_rate": 0.0001, + "loss": 0.2949, + "step": 245 + }, + { + "epoch": 1.5594541910331383, + "grad_norm": 0.032470703125, + "learning_rate": 0.0001, + "loss": 0.2576, + "step": 250 + }, + { + "epoch": 1.5594541910331383, + "step": 250, + "total_flos": 4.824681746497536e+17, + "train_loss": 0.3939001045227051, + "train_runtime": 16242.9143, + "train_samples_per_second": 0.985, + "train_steps_per_second": 0.015 + } + ], + "logging_steps": 5, + "max_steps": 250, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 90, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 4.824681746497536e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/codegen/codegen_c_dataflow/all_results.json b/codellama/c/codegen/codegen_c_dataflow/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8e8b0ba84438f8ba44ddd6b0830f92de2debd858 --- /dev/null +++ b/codellama/c/codegen/codegen_c_dataflow/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.5594541910331383, + "total_flos": 4.824681746497536e+17, + "train_loss": 0.32678396368026735, + "train_runtime": 14418.506, + "train_samples_per_second": 1.11, + "train_steps_per_second": 0.017 +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/README.md b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/adapter_config.json b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..def328d8bba3c86728b4b74d2e0a34798c075212 --- /dev/null +++ b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "down_proj", + "o_proj", + "up_proj", + "v_proj", + "k_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/adapter_model.safetensors b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..555c3f44b69f8a6816a53b65d5012bbce6825666 --- /dev/null +++ b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce2bcf8d16b9eb6399ca69b666cb961d82b2da2f71c234a74b58988f45f9b43c +size 1156480200 diff --git a/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/adapter_model/README.md b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/adapter_model/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/adapter_model/adapter_config.json b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..def328d8bba3c86728b4b74d2e0a34798c075212 --- /dev/null +++ b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/adapter_model/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "down_proj", + "o_proj", + "up_proj", + "v_proj", + "k_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/adapter_model/adapter_model.safetensors b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/adapter_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..555c3f44b69f8a6816a53b65d5012bbce6825666 --- /dev/null +++ b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/adapter_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce2bcf8d16b9eb6399ca69b666cb961d82b2da2f71c234a74b58988f45f9b43c +size 1156480200 diff --git a/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/added_tokens.json b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074 --- /dev/null +++ b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 32016 +} diff --git a/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/optimizer.pt b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb092e2b9f5b651b1c2954b7ea6950651deaea00 --- /dev/null +++ b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ba7a12f98fe3637dd9e2a9b7a88e8ca89a61ebd3df482cb398f30ccc1aed733 +size 2003126962 diff --git a/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/rng_state.pth b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d2d1ca450f51a73b9938ae4d77eda4f9e4e83adb --- /dev/null +++ b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3834587c092125bd201661e37e816cdbd55b1a136077c4e0b1d7944daa54445 +size 14244 diff --git a/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/scheduler.pt b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..643c118cf4c0a4a8f0d0d4818981421321bfc74c --- /dev/null +++ b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e30df1bee25ad98e1c721d888a184c00d77649c8e5c8c3b1e8a4c16f9fe7f7ef +size 1064 diff --git a/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/special_tokens_map.json b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d --- /dev/null +++ b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/special_tokens_map.json @@ -0,0 +1,36 @@ +{ + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/tokenizer.model b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4 --- /dev/null +++ b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6 +size 500058 diff --git a/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/tokenizer_config.json b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d --- /dev/null +++ b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/tokenizer_config.json @@ -0,0 +1,94 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": " ", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<", + "eot_token": "▁>\\n' + system_message + '\\n< >\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "fill_token": " ", + "legacy": null, + "middle_token": "▁ ", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "[PAD]", + "padding_side": "right", + "prefix_token": "▁ ", + "sp_model_kwargs": {}, + "suffix_first": false, + "suffix_token": "▁", + "tokenizer_class": "CodeLlamaTokenizer", + "unk_token": " ", + "use_default_system_prompt": false +} diff --git a/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/trainer_state.json b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e6fcf65b1768075a05a9fe73be490e8ce267bc5b --- /dev/null +++ b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/trainer_state.json @@ -0,0 +1,383 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.5594541910331383, + "eval_steps": 500, + "global_step": 250, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.031189083820662766, + "grad_norm": 0.0859375, + "learning_rate": 0.0001, + "loss": 0.6539, + "step": 5 + }, + { + "epoch": 0.06237816764132553, + "grad_norm": 0.048828125, + "learning_rate": 0.0001, + "loss": 0.4137, + "step": 10 + }, + { + "epoch": 0.0935672514619883, + "grad_norm": 0.044921875, + "learning_rate": 0.0001, + "loss": 0.4041, + "step": 15 + }, + { + "epoch": 0.12475633528265107, + "grad_norm": 0.047607421875, + "learning_rate": 0.0001, + "loss": 0.4286, + "step": 20 + }, + { + "epoch": 0.15594541910331383, + "grad_norm": 0.05712890625, + "learning_rate": 0.0001, + "loss": 0.4432, + "step": 25 + }, + { + "epoch": 0.1871345029239766, + "grad_norm": 0.05517578125, + "learning_rate": 0.0001, + "loss": 0.4199, + "step": 30 + }, + { + "epoch": 0.21832358674463936, + "grad_norm": 0.051025390625, + "learning_rate": 0.0001, + "loss": 0.4192, + "step": 35 + }, + { + "epoch": 0.24951267056530213, + "grad_norm": 0.19921875, + "learning_rate": 0.0001, + "loss": 0.4418, + "step": 40 + }, + { + "epoch": 0.2807017543859649, + "grad_norm": 0.06201171875, + "learning_rate": 0.0001, + "loss": 0.3354, + "step": 45 + }, + { + "epoch": 0.31189083820662766, + "grad_norm": 0.0311279296875, + "learning_rate": 0.0001, + "loss": 0.3194, + "step": 50 + }, + { + "epoch": 0.34307992202729043, + "grad_norm": 0.0277099609375, + "learning_rate": 0.0001, + "loss": 0.3244, + "step": 55 + }, + { + "epoch": 0.3742690058479532, + "grad_norm": 0.0291748046875, + "learning_rate": 0.0001, + "loss": 0.3296, + "step": 60 + }, + { + "epoch": 0.40545808966861596, + "grad_norm": 0.030029296875, + "learning_rate": 0.0001, + "loss": 0.3166, + "step": 65 + }, + { + "epoch": 0.43664717348927873, + "grad_norm": 0.029541015625, + "learning_rate": 0.0001, + "loss": 0.3296, + "step": 70 + }, + { + "epoch": 0.4678362573099415, + "grad_norm": 0.03271484375, + "learning_rate": 0.0001, + "loss": 0.3367, + "step": 75 + }, + { + "epoch": 0.49902534113060426, + "grad_norm": 0.07275390625, + "learning_rate": 0.0001, + "loss": 0.3594, + "step": 80 + }, + { + "epoch": 0.530214424951267, + "grad_norm": 0.044921875, + "learning_rate": 0.0001, + "loss": 0.3167, + "step": 85 + }, + { + "epoch": 0.5614035087719298, + "grad_norm": 0.031982421875, + "learning_rate": 0.0001, + "loss": 0.2832, + "step": 90 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 0.0281982421875, + "learning_rate": 0.0001, + "loss": 0.299, + "step": 95 + }, + { + "epoch": 0.6237816764132553, + "grad_norm": 0.0299072265625, + "learning_rate": 0.0001, + "loss": 0.3073, + "step": 100 + }, + { + "epoch": 0.6549707602339181, + "grad_norm": 0.029541015625, + "learning_rate": 0.0001, + "loss": 0.2995, + "step": 105 + }, + { + "epoch": 0.6861598440545809, + "grad_norm": 0.0308837890625, + "learning_rate": 0.0001, + "loss": 0.3026, + "step": 110 + }, + { + "epoch": 0.7173489278752436, + "grad_norm": 0.033935546875, + "learning_rate": 0.0001, + "loss": 0.3234, + "step": 115 + }, + { + "epoch": 0.7485380116959064, + "grad_norm": 0.07421875, + "learning_rate": 0.0001, + "loss": 0.3463, + "step": 120 + }, + { + "epoch": 0.7797270955165692, + "grad_norm": 0.033935546875, + "learning_rate": 0.0001, + "loss": 0.2892, + "step": 125 + }, + { + "epoch": 0.8109161793372319, + "grad_norm": 0.0294189453125, + "learning_rate": 0.0001, + "loss": 0.2819, + "step": 130 + }, + { + "epoch": 0.8421052631578947, + "grad_norm": 0.0284423828125, + "learning_rate": 0.0001, + "loss": 0.3008, + "step": 135 + }, + { + "epoch": 0.8732943469785575, + "grad_norm": 0.0262451171875, + "learning_rate": 0.0001, + "loss": 0.3002, + "step": 140 + }, + { + "epoch": 0.9044834307992202, + "grad_norm": 0.0294189453125, + "learning_rate": 0.0001, + "loss": 0.3035, + "step": 145 + }, + { + "epoch": 0.935672514619883, + "grad_norm": 0.033935546875, + "learning_rate": 0.0001, + "loss": 0.2954, + "step": 150 + }, + { + "epoch": 0.9668615984405458, + "grad_norm": 0.03515625, + "learning_rate": 0.0001, + "loss": 0.3033, + "step": 155 + }, + { + "epoch": 0.9980506822612085, + "grad_norm": 0.07763671875, + "learning_rate": 0.0001, + "loss": 0.352, + "step": 160 + }, + { + "epoch": 1.0292397660818713, + "grad_norm": 0.041748046875, + "learning_rate": 0.0001, + "loss": 0.2887, + "step": 165 + }, + { + "epoch": 1.060428849902534, + "grad_norm": 0.0322265625, + "learning_rate": 0.0001, + "loss": 0.2654, + "step": 170 + }, + { + "epoch": 1.0916179337231968, + "grad_norm": 0.029296875, + "learning_rate": 0.0001, + "loss": 0.2809, + "step": 175 + }, + { + "epoch": 1.1228070175438596, + "grad_norm": 0.0311279296875, + "learning_rate": 0.0001, + "loss": 0.2943, + "step": 180 + }, + { + "epoch": 1.1539961013645224, + "grad_norm": 0.0306396484375, + "learning_rate": 0.0001, + "loss": 0.2927, + "step": 185 + }, + { + "epoch": 1.1851851851851851, + "grad_norm": 0.032958984375, + "learning_rate": 0.0001, + "loss": 0.2813, + "step": 190 + }, + { + "epoch": 1.2163742690058479, + "grad_norm": 0.0380859375, + "learning_rate": 0.0001, + "loss": 0.2982, + "step": 195 + }, + { + "epoch": 1.2475633528265107, + "grad_norm": 0.0576171875, + "learning_rate": 0.0001, + "loss": 0.3076, + "step": 200 + }, + { + "epoch": 1.2787524366471734, + "grad_norm": 0.095703125, + "learning_rate": 0.0001, + "loss": 0.2739, + "step": 205 + }, + { + "epoch": 1.3099415204678362, + "grad_norm": 0.03515625, + "learning_rate": 0.0001, + "loss": 0.2761, + "step": 210 + }, + { + "epoch": 1.341130604288499, + "grad_norm": 0.032958984375, + "learning_rate": 0.0001, + "loss": 0.2882, + "step": 215 + }, + { + "epoch": 1.3723196881091617, + "grad_norm": 0.034423828125, + "learning_rate": 0.0001, + "loss": 0.2885, + "step": 220 + }, + { + "epoch": 1.4035087719298245, + "grad_norm": 0.034912109375, + "learning_rate": 0.0001, + "loss": 0.2881, + "step": 225 + }, + { + "epoch": 1.4346978557504872, + "grad_norm": 0.03466796875, + "learning_rate": 0.0001, + "loss": 0.2745, + "step": 230 + }, + { + "epoch": 1.46588693957115, + "grad_norm": 0.042236328125, + "learning_rate": 0.0001, + "loss": 0.301, + "step": 235 + }, + { + "epoch": 1.4970760233918128, + "grad_norm": 0.0673828125, + "learning_rate": 0.0001, + "loss": 0.3076, + "step": 240 + }, + { + "epoch": 1.5282651072124755, + "grad_norm": 0.04345703125, + "learning_rate": 0.0001, + "loss": 0.2945, + "step": 245 + }, + { + "epoch": 1.5594541910331383, + "grad_norm": 0.032958984375, + "learning_rate": 0.0001, + "loss": 0.2577, + "step": 250 + } + ], + "logging_steps": 5, + "max_steps": 250, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 90, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 4.824681746497536e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/training_args.bin b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..14434618240d4b290e2de77a358a617c5f229774 --- /dev/null +++ b/codellama/c/codegen/codegen_c_dataflow/checkpoint-250/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8465c79cb850aca80983713c4d03a8f9665b1bcba3673754cb993081960b1d40 +size 7416 diff --git a/codellama/c/codegen/codegen_c_dataflow/completed b/codellama/c/codegen/codegen_c_dataflow/completed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/codellama/c/codegen/codegen_c_dataflow/metrics.json b/codellama/c/codegen/codegen_c_dataflow/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..cbb90460ac91c7cdb62ae7cd077027323ed7ba4e --- /dev/null +++ b/codellama/c/codegen/codegen_c_dataflow/metrics.json @@ -0,0 +1 @@ +{"run_name": "codegen_c_dataflow", "train_runtime": 14418.506, "train_samples_per_second": 1.11, "train_steps_per_second": 0.017, "total_flos": 4.824681746497536e+17, "train_loss": 0.32678396368026735, "epoch": 1.5594541910331383} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_dataflow/train_results.json b/codellama/c/codegen/codegen_c_dataflow/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8e8b0ba84438f8ba44ddd6b0830f92de2debd858 --- /dev/null +++ b/codellama/c/codegen/codegen_c_dataflow/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.5594541910331383, + "total_flos": 4.824681746497536e+17, + "train_loss": 0.32678396368026735, + "train_runtime": 14418.506, + "train_samples_per_second": 1.11, + "train_steps_per_second": 0.017 +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_dataflow/trainer_state.json b/codellama/c/codegen/codegen_c_dataflow/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a436bfa417aca1f029ad41d41248f5c36092c4c2 --- /dev/null +++ b/codellama/c/codegen/codegen_c_dataflow/trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.5594541910331383, + "eval_steps": 500, + "global_step": 250, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.031189083820662766, + "grad_norm": 0.0859375, + "learning_rate": 0.0001, + "loss": 0.6539, + "step": 5 + }, + { + "epoch": 0.06237816764132553, + "grad_norm": 0.048828125, + "learning_rate": 0.0001, + "loss": 0.4137, + "step": 10 + }, + { + "epoch": 0.0935672514619883, + "grad_norm": 0.044921875, + "learning_rate": 0.0001, + "loss": 0.4041, + "step": 15 + }, + { + "epoch": 0.12475633528265107, + "grad_norm": 0.047607421875, + "learning_rate": 0.0001, + "loss": 0.4286, + "step": 20 + }, + { + "epoch": 0.15594541910331383, + "grad_norm": 0.05712890625, + "learning_rate": 0.0001, + "loss": 0.4432, + "step": 25 + }, + { + "epoch": 0.1871345029239766, + "grad_norm": 0.05517578125, + "learning_rate": 0.0001, + "loss": 0.4199, + "step": 30 + }, + { + "epoch": 0.21832358674463936, + "grad_norm": 0.051025390625, + "learning_rate": 0.0001, + "loss": 0.4192, + "step": 35 + }, + { + "epoch": 0.24951267056530213, + "grad_norm": 0.19921875, + "learning_rate": 0.0001, + "loss": 0.4418, + "step": 40 + }, + { + "epoch": 0.2807017543859649, + "grad_norm": 0.06201171875, + "learning_rate": 0.0001, + "loss": 0.3354, + "step": 45 + }, + { + "epoch": 0.31189083820662766, + "grad_norm": 0.0311279296875, + "learning_rate": 0.0001, + "loss": 0.3194, + "step": 50 + }, + { + "epoch": 0.34307992202729043, + "grad_norm": 0.0277099609375, + "learning_rate": 0.0001, + "loss": 0.3244, + "step": 55 + }, + { + "epoch": 0.3742690058479532, + "grad_norm": 0.0291748046875, + "learning_rate": 0.0001, + "loss": 0.3296, + "step": 60 + }, + { + "epoch": 0.40545808966861596, + "grad_norm": 0.030029296875, + "learning_rate": 0.0001, + "loss": 0.3166, + "step": 65 + }, + { + "epoch": 0.43664717348927873, + "grad_norm": 0.029541015625, + "learning_rate": 0.0001, + "loss": 0.3296, + "step": 70 + }, + { + "epoch": 0.4678362573099415, + "grad_norm": 0.03271484375, + "learning_rate": 0.0001, + "loss": 0.3367, + "step": 75 + }, + { + "epoch": 0.49902534113060426, + "grad_norm": 0.07275390625, + "learning_rate": 0.0001, + "loss": 0.3594, + "step": 80 + }, + { + "epoch": 0.530214424951267, + "grad_norm": 0.044921875, + "learning_rate": 0.0001, + "loss": 0.3167, + "step": 85 + }, + { + "epoch": 0.5614035087719298, + "grad_norm": 0.031982421875, + "learning_rate": 0.0001, + "loss": 0.2832, + "step": 90 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 0.0281982421875, + "learning_rate": 0.0001, + "loss": 0.299, + "step": 95 + }, + { + "epoch": 0.6237816764132553, + "grad_norm": 0.0299072265625, + "learning_rate": 0.0001, + "loss": 0.3073, + "step": 100 + }, + { + "epoch": 0.6549707602339181, + "grad_norm": 0.029541015625, + "learning_rate": 0.0001, + "loss": 0.2995, + "step": 105 + }, + { + "epoch": 0.6861598440545809, + "grad_norm": 0.0308837890625, + "learning_rate": 0.0001, + "loss": 0.3026, + "step": 110 + }, + { + "epoch": 0.7173489278752436, + "grad_norm": 0.033935546875, + "learning_rate": 0.0001, + "loss": 0.3234, + "step": 115 + }, + { + "epoch": 0.7485380116959064, + "grad_norm": 0.07421875, + "learning_rate": 0.0001, + "loss": 0.3463, + "step": 120 + }, + { + "epoch": 0.7797270955165692, + "grad_norm": 0.033935546875, + "learning_rate": 0.0001, + "loss": 0.2892, + "step": 125 + }, + { + "epoch": 0.8109161793372319, + "grad_norm": 0.0294189453125, + "learning_rate": 0.0001, + "loss": 0.2819, + "step": 130 + }, + { + "epoch": 0.8421052631578947, + "grad_norm": 0.0284423828125, + "learning_rate": 0.0001, + "loss": 0.3008, + "step": 135 + }, + { + "epoch": 0.8732943469785575, + "grad_norm": 0.0262451171875, + "learning_rate": 0.0001, + "loss": 0.3002, + "step": 140 + }, + { + "epoch": 0.9044834307992202, + "grad_norm": 0.0294189453125, + "learning_rate": 0.0001, + "loss": 0.3035, + "step": 145 + }, + { + "epoch": 0.935672514619883, + "grad_norm": 0.033935546875, + "learning_rate": 0.0001, + "loss": 0.2954, + "step": 150 + }, + { + "epoch": 0.9668615984405458, + "grad_norm": 0.03515625, + "learning_rate": 0.0001, + "loss": 0.3033, + "step": 155 + }, + { + "epoch": 0.9980506822612085, + "grad_norm": 0.07763671875, + "learning_rate": 0.0001, + "loss": 0.352, + "step": 160 + }, + { + "epoch": 1.0292397660818713, + "grad_norm": 0.041748046875, + "learning_rate": 0.0001, + "loss": 0.2887, + "step": 165 + }, + { + "epoch": 1.060428849902534, + "grad_norm": 0.0322265625, + "learning_rate": 0.0001, + "loss": 0.2654, + "step": 170 + }, + { + "epoch": 1.0916179337231968, + "grad_norm": 0.029296875, + "learning_rate": 0.0001, + "loss": 0.2809, + "step": 175 + }, + { + "epoch": 1.1228070175438596, + "grad_norm": 0.0311279296875, + "learning_rate": 0.0001, + "loss": 0.2943, + "step": 180 + }, + { + "epoch": 1.1539961013645224, + "grad_norm": 0.0306396484375, + "learning_rate": 0.0001, + "loss": 0.2927, + "step": 185 + }, + { + "epoch": 1.1851851851851851, + "grad_norm": 0.032958984375, + "learning_rate": 0.0001, + "loss": 0.2813, + "step": 190 + }, + { + "epoch": 1.2163742690058479, + "grad_norm": 0.0380859375, + "learning_rate": 0.0001, + "loss": 0.2982, + "step": 195 + }, + { + "epoch": 1.2475633528265107, + "grad_norm": 0.0576171875, + "learning_rate": 0.0001, + "loss": 0.3076, + "step": 200 + }, + { + "epoch": 1.2787524366471734, + "grad_norm": 0.095703125, + "learning_rate": 0.0001, + "loss": 0.2739, + "step": 205 + }, + { + "epoch": 1.3099415204678362, + "grad_norm": 0.03515625, + "learning_rate": 0.0001, + "loss": 0.2761, + "step": 210 + }, + { + "epoch": 1.341130604288499, + "grad_norm": 0.032958984375, + "learning_rate": 0.0001, + "loss": 0.2882, + "step": 215 + }, + { + "epoch": 1.3723196881091617, + "grad_norm": 0.034423828125, + "learning_rate": 0.0001, + "loss": 0.2885, + "step": 220 + }, + { + "epoch": 1.4035087719298245, + "grad_norm": 0.034912109375, + "learning_rate": 0.0001, + "loss": 0.2881, + "step": 225 + }, + { + "epoch": 1.4346978557504872, + "grad_norm": 0.03466796875, + "learning_rate": 0.0001, + "loss": 0.2745, + "step": 230 + }, + { + "epoch": 1.46588693957115, + "grad_norm": 0.042236328125, + "learning_rate": 0.0001, + "loss": 0.301, + "step": 235 + }, + { + "epoch": 1.4970760233918128, + "grad_norm": 0.0673828125, + "learning_rate": 0.0001, + "loss": 0.3076, + "step": 240 + }, + { + "epoch": 1.5282651072124755, + "grad_norm": 0.04345703125, + "learning_rate": 0.0001, + "loss": 0.2945, + "step": 245 + }, + { + "epoch": 1.5594541910331383, + "grad_norm": 0.032958984375, + "learning_rate": 0.0001, + "loss": 0.2577, + "step": 250 + }, + { + "epoch": 1.5594541910331383, + "step": 250, + "total_flos": 4.824681746497536e+17, + "train_loss": 0.32678396368026735, + "train_runtime": 14418.506, + "train_samples_per_second": 1.11, + "train_steps_per_second": 0.017 + } + ], + "logging_steps": 5, + "max_steps": 250, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 90, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 4.824681746497536e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/codegen/codegen_c_srcml/all_results.json b/codellama/c/codegen/codegen_c_srcml/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5c13066310556e5f10bf8cd7b0a69aa0a4e5ec08 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.5594541910331383, + "total_flos": 4.824681746497536e+17, + "train_loss": 0.31994184494018557, + "train_runtime": 16530.0404, + "train_samples_per_second": 0.968, + "train_steps_per_second": 0.015 +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-180/README.md b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-180/adapter_config.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..778f43be6afd1d1a469dafeb129160b7207123d6 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "k_proj", + "o_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-180/adapter_model.safetensors b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..02ae2430479e9792f3024fc5774b18464aea566a --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8821671fc02d1c5c42f49f2e885db44b794c5c4447ee5657533d12c067029c3 +size 1156480200 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-180/adapter_model/README.md b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/adapter_model/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-180/adapter_model/adapter_config.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..778f43be6afd1d1a469dafeb129160b7207123d6 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/adapter_model/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "k_proj", + "o_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-180/adapter_model/adapter_model.safetensors b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/adapter_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..02ae2430479e9792f3024fc5774b18464aea566a --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/adapter_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8821671fc02d1c5c42f49f2e885db44b794c5c4447ee5657533d12c067029c3 +size 1156480200 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-180/added_tokens.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 32016 +} diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-180/optimizer.pt b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d49dc06848d6d0b496eff9c1ec7c7ffb1b7cc5f --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57e3c39fadc0a223ea3b21abbfa750e71836c1afe10bd38f13348efb302fc1f6 +size 2003126962 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-180/rng_state.pth b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d2d1ca450f51a73b9938ae4d77eda4f9e4e83adb --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3834587c092125bd201661e37e816cdbd55b1a136077c4e0b1d7944daa54445 +size 14244 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-180/scheduler.pt b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd32f24b55247712dc306a7f48b1e67f9136b26b --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:244453cd6aad26ed6e8f9d969778193b9354089d8336fe58bfb91c089a53bf6f +size 1064 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-180/special_tokens_map.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/special_tokens_map.json @@ -0,0 +1,36 @@ +{ + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-180/tokenizer.model b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6 +size 500058 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-180/tokenizer_config.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/tokenizer_config.json @@ -0,0 +1,94 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": " ", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<", + "eot_token": "▁>\\n' + system_message + '\\n< >\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "fill_token": " ", + "legacy": null, + "middle_token": "▁ ", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "[PAD]", + "padding_side": "right", + "prefix_token": "▁ ", + "sp_model_kwargs": {}, + "suffix_first": false, + "suffix_token": "▁", + "tokenizer_class": "CodeLlamaTokenizer", + "unk_token": " ", + "use_default_system_prompt": false +} diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-180/trainer_state.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a122b4ede448897332877874f71a5ef11adacc52 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/trainer_state.json @@ -0,0 +1,285 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.1228070175438596, + "eval_steps": 500, + "global_step": 180, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.031189083820662766, + "grad_norm": 0.08056640625, + "learning_rate": 0.0001, + "loss": 0.6599, + "step": 5 + }, + { + "epoch": 0.06237816764132553, + "grad_norm": 0.049560546875, + "learning_rate": 0.0001, + "loss": 0.4191, + "step": 10 + }, + { + "epoch": 0.0935672514619883, + "grad_norm": 0.046875, + "learning_rate": 0.0001, + "loss": 0.3937, + "step": 15 + }, + { + "epoch": 0.12475633528265107, + "grad_norm": 0.056640625, + "learning_rate": 0.0001, + "loss": 0.3818, + "step": 20 + }, + { + "epoch": 0.15594541910331383, + "grad_norm": 0.030029296875, + "learning_rate": 0.0001, + "loss": 0.3685, + "step": 25 + }, + { + "epoch": 0.1871345029239766, + "grad_norm": 0.031005859375, + "learning_rate": 0.0001, + "loss": 0.361, + "step": 30 + }, + { + "epoch": 0.21832358674463936, + "grad_norm": 0.03564453125, + "learning_rate": 0.0001, + "loss": 0.3753, + "step": 35 + }, + { + "epoch": 0.24951267056530213, + "grad_norm": 0.1162109375, + "learning_rate": 0.0001, + "loss": 0.4059, + "step": 40 + }, + { + "epoch": 0.2807017543859649, + "grad_norm": 0.045166015625, + "learning_rate": 0.0001, + "loss": 0.3342, + "step": 45 + }, + { + "epoch": 0.31189083820662766, + "grad_norm": 0.027587890625, + "learning_rate": 0.0001, + "loss": 0.3155, + "step": 50 + }, + { + "epoch": 0.34307992202729043, + "grad_norm": 0.0284423828125, + "learning_rate": 0.0001, + "loss": 0.3219, + "step": 55 + }, + { + "epoch": 0.3742690058479532, + "grad_norm": 0.0255126953125, + "learning_rate": 0.0001, + "loss": 0.3279, + "step": 60 + }, + { + "epoch": 0.40545808966861596, + "grad_norm": 0.0267333984375, + "learning_rate": 0.0001, + "loss": 0.3145, + "step": 65 + }, + { + "epoch": 0.43664717348927873, + "grad_norm": 0.027099609375, + "learning_rate": 0.0001, + "loss": 0.3277, + "step": 70 + }, + { + "epoch": 0.4678362573099415, + "grad_norm": 0.03173828125, + "learning_rate": 0.0001, + "loss": 0.3354, + "step": 75 + }, + { + "epoch": 0.49902534113060426, + "grad_norm": 0.06982421875, + "learning_rate": 0.0001, + "loss": 0.3578, + "step": 80 + }, + { + "epoch": 0.530214424951267, + "grad_norm": 0.03759765625, + "learning_rate": 0.0001, + "loss": 0.317, + "step": 85 + }, + { + "epoch": 0.5614035087719298, + "grad_norm": 0.02734375, + "learning_rate": 0.0001, + "loss": 0.2833, + "step": 90 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 0.029052734375, + "learning_rate": 0.0001, + "loss": 0.2981, + "step": 95 + }, + { + "epoch": 0.6237816764132553, + "grad_norm": 0.0281982421875, + "learning_rate": 0.0001, + "loss": 0.3059, + "step": 100 + }, + { + "epoch": 0.6549707602339181, + "grad_norm": 0.0277099609375, + "learning_rate": 0.0001, + "loss": 0.2979, + "step": 105 + }, + { + "epoch": 0.6861598440545809, + "grad_norm": 0.0289306640625, + "learning_rate": 0.0001, + "loss": 0.3014, + "step": 110 + }, + { + "epoch": 0.7173489278752436, + "grad_norm": 0.033447265625, + "learning_rate": 0.0001, + "loss": 0.322, + "step": 115 + }, + { + "epoch": 0.7485380116959064, + "grad_norm": 0.08056640625, + "learning_rate": 0.0001, + "loss": 0.3465, + "step": 120 + }, + { + "epoch": 0.7797270955165692, + "grad_norm": 0.04296875, + "learning_rate": 0.0001, + "loss": 0.2876, + "step": 125 + }, + { + "epoch": 0.8109161793372319, + "grad_norm": 0.0291748046875, + "learning_rate": 0.0001, + "loss": 0.2804, + "step": 130 + }, + { + "epoch": 0.8421052631578947, + "grad_norm": 0.0264892578125, + "learning_rate": 0.0001, + "loss": 0.2998, + "step": 135 + }, + { + "epoch": 0.8732943469785575, + "grad_norm": 0.0269775390625, + "learning_rate": 0.0001, + "loss": 0.2981, + "step": 140 + }, + { + "epoch": 0.9044834307992202, + "grad_norm": 0.0302734375, + "learning_rate": 0.0001, + "loss": 0.3018, + "step": 145 + }, + { + "epoch": 0.935672514619883, + "grad_norm": 0.03271484375, + "learning_rate": 0.0001, + "loss": 0.2936, + "step": 150 + }, + { + "epoch": 0.9668615984405458, + "grad_norm": 0.0341796875, + "learning_rate": 0.0001, + "loss": 0.3017, + "step": 155 + }, + { + "epoch": 0.9980506822612085, + "grad_norm": 0.07763671875, + "learning_rate": 0.0001, + "loss": 0.35, + "step": 160 + }, + { + "epoch": 1.0292397660818713, + "grad_norm": 0.034423828125, + "learning_rate": 0.0001, + "loss": 0.2864, + "step": 165 + }, + { + "epoch": 1.060428849902534, + "grad_norm": 0.03515625, + "learning_rate": 0.0001, + "loss": 0.2628, + "step": 170 + }, + { + "epoch": 1.0916179337231968, + "grad_norm": 0.0322265625, + "learning_rate": 0.0001, + "loss": 0.2783, + "step": 175 + }, + { + "epoch": 1.1228070175438596, + "grad_norm": 0.031494140625, + "learning_rate": 0.0001, + "loss": 0.2912, + "step": 180 + } + ], + "logging_steps": 5, + "max_steps": 250, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 90, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.5196202394271744e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-180/training_args.bin b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a7821e913d6ae10d45b53e723f526cff5f5fa882 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-180/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d8cefd12ccf98d630301cfc0e34899e503758b920913c79bb40dea210d1f4b8 +size 7416 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-240/README.md b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-240/adapter_config.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..dce5fe81a253e8a4d30b85a67b13d16d6b41e8b2 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "gate_proj", + "down_proj", + "v_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-240/adapter_model.safetensors b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b21cfd6895c5ad5701c6e1cc4a5c08071e395826 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cba8f006301f8aba0c522a1ef079ad7235ea329c916e5a53cb63a56704f1efa +size 1156480200 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-240/adapter_model/README.md b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/adapter_model/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-240/adapter_model/adapter_config.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..dce5fe81a253e8a4d30b85a67b13d16d6b41e8b2 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/adapter_model/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "gate_proj", + "down_proj", + "v_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-240/adapter_model/adapter_model.safetensors b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/adapter_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b21cfd6895c5ad5701c6e1cc4a5c08071e395826 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/adapter_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cba8f006301f8aba0c522a1ef079ad7235ea329c916e5a53cb63a56704f1efa +size 1156480200 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-240/added_tokens.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 32016 +} diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-240/optimizer.pt b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..446c60de99c4d32b59b3fac16528e68eef570943 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:984929519fe898604e167f8e9b064c8c0d84e9f1d25c8fa06b0705e0e485feb8 +size 2003126962 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-240/rng_state.pth b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..674f19c19f626ee3e158871efe1295acbb56cc23 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b6eed83fa00e1e202c49ba20841681e1a50f93c304519ccbd954520d4bb86bd +size 14244 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-240/scheduler.pt b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb5d1834f266efa95e807bbf42a5ef055d59cb79 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2836d002611f504ff01dacdfc97bbce280b28a36695ac641b819947f616e7533 +size 1064 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-240/special_tokens_map.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/special_tokens_map.json @@ -0,0 +1,36 @@ +{ + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-240/tokenizer.model b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6 +size 500058 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-240/tokenizer_config.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/tokenizer_config.json @@ -0,0 +1,94 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": " ", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<", + "eot_token": "▁>\\n' + system_message + '\\n< >\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "fill_token": " ", + "legacy": null, + "middle_token": "▁ ", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "[PAD]", + "padding_side": "right", + "prefix_token": "▁ ", + "sp_model_kwargs": {}, + "suffix_first": false, + "suffix_token": "▁", + "tokenizer_class": "CodeLlamaTokenizer", + "unk_token": " ", + "use_default_system_prompt": false +} diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-240/trainer_state.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..24c016047ba7370b1abe2977072d7bda92190e48 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/trainer_state.json @@ -0,0 +1,369 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.536, + "eval_steps": 500, + "global_step": 240, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.032, + "grad_norm": 0.047607421875, + "learning_rate": 0.0001, + "loss": 0.3836, + "step": 5 + }, + { + "epoch": 0.064, + "grad_norm": 0.04541015625, + "learning_rate": 0.0001, + "loss": 0.2449, + "step": 10 + }, + { + "epoch": 0.096, + "grad_norm": 0.056396484375, + "learning_rate": 0.0001, + "loss": 0.1513, + "step": 15 + }, + { + "epoch": 0.128, + "grad_norm": 0.03857421875, + "learning_rate": 0.0001, + "loss": 0.0705, + "step": 20 + }, + { + "epoch": 0.16, + "grad_norm": 0.0286865234375, + "learning_rate": 0.0001, + "loss": 0.0488, + "step": 25 + }, + { + "epoch": 0.192, + "grad_norm": 0.03173828125, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 30 + }, + { + "epoch": 0.224, + "grad_norm": 0.054931640625, + "learning_rate": 0.0001, + "loss": 0.0278, + "step": 35 + }, + { + "epoch": 0.256, + "grad_norm": 0.0888671875, + "learning_rate": 0.0001, + "loss": 0.1414, + "step": 40 + }, + { + "epoch": 0.288, + "grad_norm": 0.015625, + "learning_rate": 0.0001, + "loss": 0.0371, + "step": 45 + }, + { + "epoch": 0.32, + "grad_norm": 0.0257568359375, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 50 + }, + { + "epoch": 0.352, + "grad_norm": 0.02197265625, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 55 + }, + { + "epoch": 0.384, + "grad_norm": 0.020751953125, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 60 + }, + { + "epoch": 0.416, + "grad_norm": 0.0164794921875, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 65 + }, + { + "epoch": 0.448, + "grad_norm": 0.0120849609375, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 70 + }, + { + "epoch": 0.48, + "grad_norm": 0.0269775390625, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 75 + }, + { + "epoch": 0.512, + "grad_norm": 0.05078125, + "learning_rate": 0.0001, + "loss": 0.0572, + "step": 80 + }, + { + "epoch": 0.544, + "grad_norm": 0.031982421875, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 85 + }, + { + "epoch": 0.576, + "grad_norm": 0.0196533203125, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 90 + }, + { + "epoch": 0.608, + "grad_norm": 0.0238037109375, + "learning_rate": 0.0001, + "loss": 0.0054, + "step": 95 + }, + { + "epoch": 0.64, + "grad_norm": 0.0108642578125, + "learning_rate": 0.0001, + "loss": 0.0043, + "step": 100 + }, + { + "epoch": 0.672, + "grad_norm": 0.0091552734375, + "learning_rate": 0.0001, + "loss": 0.004, + "step": 105 + }, + { + "epoch": 0.704, + "grad_norm": 0.01336669921875, + "learning_rate": 0.0001, + "loss": 0.0043, + "step": 110 + }, + { + "epoch": 0.736, + "grad_norm": 0.033203125, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 115 + }, + { + "epoch": 0.768, + "grad_norm": 0.0169677734375, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 120 + }, + { + "epoch": 0.8, + "grad_norm": 0.00909423828125, + "learning_rate": 0.0001, + "loss": 0.0031, + "step": 125 + }, + { + "epoch": 0.832, + "grad_norm": 0.01171875, + "learning_rate": 0.0001, + "loss": 0.0038, + "step": 130 + }, + { + "epoch": 0.864, + "grad_norm": 0.00946044921875, + "learning_rate": 0.0001, + "loss": 0.0036, + "step": 135 + }, + { + "epoch": 0.896, + "grad_norm": 0.014892578125, + "learning_rate": 0.0001, + "loss": 0.0047, + "step": 140 + }, + { + "epoch": 0.928, + "grad_norm": 0.01239013671875, + "learning_rate": 0.0001, + "loss": 0.006, + "step": 145 + }, + { + "epoch": 0.96, + "grad_norm": 0.00982666015625, + "learning_rate": 0.0001, + "loss": 0.0032, + "step": 150 + }, + { + "epoch": 0.992, + "grad_norm": 0.01031494140625, + "learning_rate": 0.0001, + "loss": 0.0037, + "step": 155 + }, + { + "epoch": 1.024, + "grad_norm": 0.006927490234375, + "learning_rate": 0.0001, + "loss": 0.0036, + "step": 160 + }, + { + "epoch": 1.056, + "grad_norm": 0.0084228515625, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 165 + }, + { + "epoch": 1.088, + "grad_norm": 0.005584716796875, + "learning_rate": 0.0001, + "loss": 0.0018, + "step": 170 + }, + { + "epoch": 1.12, + "grad_norm": 0.006683349609375, + "learning_rate": 0.0001, + "loss": 0.0017, + "step": 175 + }, + { + "epoch": 1.152, + "grad_norm": 0.004486083984375, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 180 + }, + { + "epoch": 1.184, + "grad_norm": 0.0087890625, + "learning_rate": 0.0001, + "loss": 0.0026, + "step": 185 + }, + { + "epoch": 1.216, + "grad_norm": 0.0062255859375, + "learning_rate": 0.0001, + "loss": 0.0015, + "step": 190 + }, + { + "epoch": 1.248, + "grad_norm": 0.0128173828125, + "learning_rate": 0.0001, + "loss": 0.0026, + "step": 195 + }, + { + "epoch": 1.28, + "grad_norm": 0.006683349609375, + "learning_rate": 0.0001, + "loss": 0.0039, + "step": 200 + }, + { + "epoch": 1.312, + "grad_norm": 0.00787353515625, + "learning_rate": 0.0001, + "loss": 0.0019, + "step": 205 + }, + { + "epoch": 1.3439999999999999, + "grad_norm": 0.0096435546875, + "learning_rate": 0.0001, + "loss": 0.0011, + "step": 210 + }, + { + "epoch": 1.376, + "grad_norm": 0.0096435546875, + "learning_rate": 0.0001, + "loss": 0.0016, + "step": 215 + }, + { + "epoch": 1.408, + "grad_norm": 0.005859375, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 220 + }, + { + "epoch": 1.44, + "grad_norm": 0.00848388671875, + "learning_rate": 0.0001, + "loss": 0.0014, + "step": 225 + }, + { + "epoch": 1.472, + "grad_norm": 0.015625, + "learning_rate": 0.0001, + "loss": 0.002, + "step": 230 + }, + { + "epoch": 1.504, + "grad_norm": 0.03857421875, + "learning_rate": 0.0001, + "loss": 0.0067, + "step": 235 + }, + { + "epoch": 1.536, + "grad_norm": 0.00811767578125, + "learning_rate": 0.0001, + "loss": 0.0062, + "step": 240 + } + ], + "logging_steps": 5, + "max_steps": 240, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 90, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 9.355246833433805e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-240/training_args.bin b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..df13c117729724974d789a96e6f8a63da0c72317 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-240/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0483c94ee92ada9498c8787c875c05f0c76bfe2f8a8e0386406848bdda4e9fc8 +size 7416 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-250/README.md b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-250/adapter_config.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..778f43be6afd1d1a469dafeb129160b7207123d6 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "k_proj", + "o_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-250/adapter_model.safetensors b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..472ca93e8650912b68bef758b61ab895c1ea5952 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb47aa0c69a4346a403498f8f7efba5284f70ecdcbb7a8153c01607c91dd8cc5 +size 1156480200 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-250/adapter_model/README.md b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/adapter_model/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-250/adapter_model/adapter_config.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..778f43be6afd1d1a469dafeb129160b7207123d6 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/adapter_model/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "k_proj", + "o_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-250/adapter_model/adapter_model.safetensors b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/adapter_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..472ca93e8650912b68bef758b61ab895c1ea5952 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/adapter_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb47aa0c69a4346a403498f8f7efba5284f70ecdcbb7a8153c01607c91dd8cc5 +size 1156480200 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-250/added_tokens.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 32016 +} diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-250/optimizer.pt b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..db09c826ec9b1407d8cb2a6c88f9b76f3489a96c --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bfe21d7c5fd3174c1850ede73764f0a3efede0bb4c934dfe10bb1a9aea329f2 +size 2003126962 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-250/rng_state.pth b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d2d1ca450f51a73b9938ae4d77eda4f9e4e83adb --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3834587c092125bd201661e37e816cdbd55b1a136077c4e0b1d7944daa54445 +size 14244 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-250/scheduler.pt b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..643c118cf4c0a4a8f0d0d4818981421321bfc74c --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e30df1bee25ad98e1c721d888a184c00d77649c8e5c8c3b1e8a4c16f9fe7f7ef +size 1064 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-250/special_tokens_map.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/special_tokens_map.json @@ -0,0 +1,36 @@ +{ + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-250/tokenizer.model b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6 +size 500058 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-250/tokenizer_config.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/tokenizer_config.json @@ -0,0 +1,94 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": " ", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<", + "eot_token": "▁>\\n' + system_message + '\\n< >\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "fill_token": " ", + "legacy": null, + "middle_token": "▁ ", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "[PAD]", + "padding_side": "right", + "prefix_token": "▁ ", + "sp_model_kwargs": {}, + "suffix_first": false, + "suffix_token": "▁", + "tokenizer_class": "CodeLlamaTokenizer", + "unk_token": " ", + "use_default_system_prompt": false +} diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-250/trainer_state.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..396607297f9a12047f1473dc53d8b25c15016012 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/trainer_state.json @@ -0,0 +1,383 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.5594541910331383, + "eval_steps": 500, + "global_step": 250, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.031189083820662766, + "grad_norm": 0.08056640625, + "learning_rate": 0.0001, + "loss": 0.6599, + "step": 5 + }, + { + "epoch": 0.06237816764132553, + "grad_norm": 0.049560546875, + "learning_rate": 0.0001, + "loss": 0.4191, + "step": 10 + }, + { + "epoch": 0.0935672514619883, + "grad_norm": 0.046875, + "learning_rate": 0.0001, + "loss": 0.3937, + "step": 15 + }, + { + "epoch": 0.12475633528265107, + "grad_norm": 0.056640625, + "learning_rate": 0.0001, + "loss": 0.3818, + "step": 20 + }, + { + "epoch": 0.15594541910331383, + "grad_norm": 0.030029296875, + "learning_rate": 0.0001, + "loss": 0.3685, + "step": 25 + }, + { + "epoch": 0.1871345029239766, + "grad_norm": 0.031005859375, + "learning_rate": 0.0001, + "loss": 0.361, + "step": 30 + }, + { + "epoch": 0.21832358674463936, + "grad_norm": 0.03564453125, + "learning_rate": 0.0001, + "loss": 0.3753, + "step": 35 + }, + { + "epoch": 0.24951267056530213, + "grad_norm": 0.1162109375, + "learning_rate": 0.0001, + "loss": 0.4059, + "step": 40 + }, + { + "epoch": 0.2807017543859649, + "grad_norm": 0.045166015625, + "learning_rate": 0.0001, + "loss": 0.3342, + "step": 45 + }, + { + "epoch": 0.31189083820662766, + "grad_norm": 0.027587890625, + "learning_rate": 0.0001, + "loss": 0.3155, + "step": 50 + }, + { + "epoch": 0.34307992202729043, + "grad_norm": 0.0284423828125, + "learning_rate": 0.0001, + "loss": 0.3219, + "step": 55 + }, + { + "epoch": 0.3742690058479532, + "grad_norm": 0.0255126953125, + "learning_rate": 0.0001, + "loss": 0.3279, + "step": 60 + }, + { + "epoch": 0.40545808966861596, + "grad_norm": 0.0267333984375, + "learning_rate": 0.0001, + "loss": 0.3145, + "step": 65 + }, + { + "epoch": 0.43664717348927873, + "grad_norm": 0.027099609375, + "learning_rate": 0.0001, + "loss": 0.3277, + "step": 70 + }, + { + "epoch": 0.4678362573099415, + "grad_norm": 0.03173828125, + "learning_rate": 0.0001, + "loss": 0.3354, + "step": 75 + }, + { + "epoch": 0.49902534113060426, + "grad_norm": 0.06982421875, + "learning_rate": 0.0001, + "loss": 0.3578, + "step": 80 + }, + { + "epoch": 0.530214424951267, + "grad_norm": 0.03759765625, + "learning_rate": 0.0001, + "loss": 0.317, + "step": 85 + }, + { + "epoch": 0.5614035087719298, + "grad_norm": 0.02734375, + "learning_rate": 0.0001, + "loss": 0.2833, + "step": 90 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 0.029052734375, + "learning_rate": 0.0001, + "loss": 0.2981, + "step": 95 + }, + { + "epoch": 0.6237816764132553, + "grad_norm": 0.0281982421875, + "learning_rate": 0.0001, + "loss": 0.3059, + "step": 100 + }, + { + "epoch": 0.6549707602339181, + "grad_norm": 0.0277099609375, + "learning_rate": 0.0001, + "loss": 0.2979, + "step": 105 + }, + { + "epoch": 0.6861598440545809, + "grad_norm": 0.0289306640625, + "learning_rate": 0.0001, + "loss": 0.3014, + "step": 110 + }, + { + "epoch": 0.7173489278752436, + "grad_norm": 0.033447265625, + "learning_rate": 0.0001, + "loss": 0.322, + "step": 115 + }, + { + "epoch": 0.7485380116959064, + "grad_norm": 0.08056640625, + "learning_rate": 0.0001, + "loss": 0.3465, + "step": 120 + }, + { + "epoch": 0.7797270955165692, + "grad_norm": 0.04296875, + "learning_rate": 0.0001, + "loss": 0.2876, + "step": 125 + }, + { + "epoch": 0.8109161793372319, + "grad_norm": 0.0291748046875, + "learning_rate": 0.0001, + "loss": 0.2804, + "step": 130 + }, + { + "epoch": 0.8421052631578947, + "grad_norm": 0.0264892578125, + "learning_rate": 0.0001, + "loss": 0.2998, + "step": 135 + }, + { + "epoch": 0.8732943469785575, + "grad_norm": 0.0269775390625, + "learning_rate": 0.0001, + "loss": 0.2981, + "step": 140 + }, + { + "epoch": 0.9044834307992202, + "grad_norm": 0.0302734375, + "learning_rate": 0.0001, + "loss": 0.3018, + "step": 145 + }, + { + "epoch": 0.935672514619883, + "grad_norm": 0.03271484375, + "learning_rate": 0.0001, + "loss": 0.2936, + "step": 150 + }, + { + "epoch": 0.9668615984405458, + "grad_norm": 0.0341796875, + "learning_rate": 0.0001, + "loss": 0.3017, + "step": 155 + }, + { + "epoch": 0.9980506822612085, + "grad_norm": 0.07763671875, + "learning_rate": 0.0001, + "loss": 0.35, + "step": 160 + }, + { + "epoch": 1.0292397660818713, + "grad_norm": 0.034423828125, + "learning_rate": 0.0001, + "loss": 0.2864, + "step": 165 + }, + { + "epoch": 1.060428849902534, + "grad_norm": 0.03515625, + "learning_rate": 0.0001, + "loss": 0.2628, + "step": 170 + }, + { + "epoch": 1.0916179337231968, + "grad_norm": 0.0322265625, + "learning_rate": 0.0001, + "loss": 0.2783, + "step": 175 + }, + { + "epoch": 1.1228070175438596, + "grad_norm": 0.031494140625, + "learning_rate": 0.0001, + "loss": 0.2912, + "step": 180 + }, + { + "epoch": 1.1539961013645224, + "grad_norm": 0.031005859375, + "learning_rate": 0.0001, + "loss": 0.2902, + "step": 185 + }, + { + "epoch": 1.1851851851851851, + "grad_norm": 0.035400390625, + "learning_rate": 0.0001, + "loss": 0.2783, + "step": 190 + }, + { + "epoch": 1.2163742690058479, + "grad_norm": 0.126953125, + "learning_rate": 0.0001, + "loss": 0.295, + "step": 195 + }, + { + "epoch": 1.2475633528265107, + "grad_norm": 0.057373046875, + "learning_rate": 0.0001, + "loss": 0.3035, + "step": 200 + }, + { + "epoch": 1.2787524366471734, + "grad_norm": 0.036376953125, + "learning_rate": 0.0001, + "loss": 0.2732, + "step": 205 + }, + { + "epoch": 1.3099415204678362, + "grad_norm": 0.040771484375, + "learning_rate": 0.0001, + "loss": 0.2738, + "step": 210 + }, + { + "epoch": 1.341130604288499, + "grad_norm": 0.03466796875, + "learning_rate": 0.0001, + "loss": 0.2868, + "step": 215 + }, + { + "epoch": 1.3723196881091617, + "grad_norm": 0.03662109375, + "learning_rate": 0.0001, + "loss": 0.2863, + "step": 220 + }, + { + "epoch": 1.4035087719298245, + "grad_norm": 0.036865234375, + "learning_rate": 0.0001, + "loss": 0.2858, + "step": 225 + }, + { + "epoch": 1.4346978557504872, + "grad_norm": 0.035400390625, + "learning_rate": 0.0001, + "loss": 0.2717, + "step": 230 + }, + { + "epoch": 1.46588693957115, + "grad_norm": 0.043212890625, + "learning_rate": 0.0001, + "loss": 0.2987, + "step": 235 + }, + { + "epoch": 1.4970760233918128, + "grad_norm": 0.0673828125, + "learning_rate": 0.0001, + "loss": 0.3034, + "step": 240 + }, + { + "epoch": 1.5282651072124755, + "grad_norm": 0.044677734375, + "learning_rate": 0.0001, + "loss": 0.2915, + "step": 245 + }, + { + "epoch": 1.5594541910331383, + "grad_norm": 0.033447265625, + "learning_rate": 0.0001, + "loss": 0.2551, + "step": 250 + } + ], + "logging_steps": 5, + "max_steps": 250, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 90, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 4.824681746497536e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-250/training_args.bin b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a7821e913d6ae10d45b53e723f526cff5f5fa882 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-250/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d8cefd12ccf98d630301cfc0e34899e503758b920913c79bb40dea210d1f4b8 +size 7416 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-90/README.md b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-90/adapter_config.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..778f43be6afd1d1a469dafeb129160b7207123d6 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "k_proj", + "o_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-90/adapter_model.safetensors b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c9c4530d002562f882f7f6329cb1976539f35214 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2ceaaccb70800cb5a132da987a04ba48977afa2504b32f05c06d014e5b73c89 +size 1156480200 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-90/adapter_model/README.md b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/adapter_model/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-90/adapter_model/adapter_config.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..778f43be6afd1d1a469dafeb129160b7207123d6 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/adapter_model/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "down_proj", + "k_proj", + "o_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-90/adapter_model/adapter_model.safetensors b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/adapter_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c9c4530d002562f882f7f6329cb1976539f35214 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/adapter_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2ceaaccb70800cb5a132da987a04ba48977afa2504b32f05c06d014e5b73c89 +size 1156480200 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-90/added_tokens.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 32016 +} diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-90/optimizer.pt b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..00f381df7279dff4837f2e6fa90fd133d7f19a1e --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efd34e16d6d9bec26f7e368c7ef2cd63204e961df954ead2921e01a0bdf5679f +size 2003126962 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-90/rng_state.pth b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2577309f2ed45a48589e0c125c309b844dd4b8ee --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdf9f3deae6bacd3a8b21adc150d1212440814d7a865155cb7c3ed9641eba3f7 +size 14244 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-90/scheduler.pt b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5088c269cc64d90dc46ecb1a7fb7927ad6415d8 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c7d5936e70e72bf0e3651da983818a5b36c8198eb19437975051ad543d68cc9 +size 1064 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-90/special_tokens_map.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/special_tokens_map.json @@ -0,0 +1,36 @@ +{ + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-90/tokenizer.model b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6 +size 500058 diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-90/tokenizer_config.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/tokenizer_config.json @@ -0,0 +1,94 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": " ", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<", + "eot_token": "▁>\\n' + system_message + '\\n< >\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "fill_token": " ", + "legacy": null, + "middle_token": "▁ ", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "[PAD]", + "padding_side": "right", + "prefix_token": "▁ ", + "sp_model_kwargs": {}, + "suffix_first": false, + "suffix_token": "▁", + "tokenizer_class": "CodeLlamaTokenizer", + "unk_token": " ", + "use_default_system_prompt": false +} diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-90/trainer_state.json b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9ac62844f08c5914c30ad523939f9e2855277c27 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/trainer_state.json @@ -0,0 +1,159 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5614035087719298, + "eval_steps": 500, + "global_step": 90, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.031189083820662766, + "grad_norm": 0.08056640625, + "learning_rate": 0.0001, + "loss": 0.6599, + "step": 5 + }, + { + "epoch": 0.06237816764132553, + "grad_norm": 0.049560546875, + "learning_rate": 0.0001, + "loss": 0.4191, + "step": 10 + }, + { + "epoch": 0.0935672514619883, + "grad_norm": 0.046875, + "learning_rate": 0.0001, + "loss": 0.3937, + "step": 15 + }, + { + "epoch": 0.12475633528265107, + "grad_norm": 0.056640625, + "learning_rate": 0.0001, + "loss": 0.3818, + "step": 20 + }, + { + "epoch": 0.15594541910331383, + "grad_norm": 0.030029296875, + "learning_rate": 0.0001, + "loss": 0.3685, + "step": 25 + }, + { + "epoch": 0.1871345029239766, + "grad_norm": 0.031005859375, + "learning_rate": 0.0001, + "loss": 0.361, + "step": 30 + }, + { + "epoch": 0.21832358674463936, + "grad_norm": 0.03564453125, + "learning_rate": 0.0001, + "loss": 0.3753, + "step": 35 + }, + { + "epoch": 0.24951267056530213, + "grad_norm": 0.1162109375, + "learning_rate": 0.0001, + "loss": 0.4059, + "step": 40 + }, + { + "epoch": 0.2807017543859649, + "grad_norm": 0.045166015625, + "learning_rate": 0.0001, + "loss": 0.3342, + "step": 45 + }, + { + "epoch": 0.31189083820662766, + "grad_norm": 0.027587890625, + "learning_rate": 0.0001, + "loss": 0.3155, + "step": 50 + }, + { + "epoch": 0.34307992202729043, + "grad_norm": 0.0284423828125, + "learning_rate": 0.0001, + "loss": 0.3219, + "step": 55 + }, + { + "epoch": 0.3742690058479532, + "grad_norm": 0.0255126953125, + "learning_rate": 0.0001, + "loss": 0.3279, + "step": 60 + }, + { + "epoch": 0.40545808966861596, + "grad_norm": 0.0267333984375, + "learning_rate": 0.0001, + "loss": 0.3145, + "step": 65 + }, + { + "epoch": 0.43664717348927873, + "grad_norm": 0.027099609375, + "learning_rate": 0.0001, + "loss": 0.3277, + "step": 70 + }, + { + "epoch": 0.4678362573099415, + "grad_norm": 0.03173828125, + "learning_rate": 0.0001, + "loss": 0.3354, + "step": 75 + }, + { + "epoch": 0.49902534113060426, + "grad_norm": 0.06982421875, + "learning_rate": 0.0001, + "loss": 0.3578, + "step": 80 + }, + { + "epoch": 0.530214424951267, + "grad_norm": 0.03759765625, + "learning_rate": 0.0001, + "loss": 0.317, + "step": 85 + }, + { + "epoch": 0.5614035087719298, + "grad_norm": 0.02734375, + "learning_rate": 0.0001, + "loss": 0.2833, + "step": 90 + } + ], + "logging_steps": 5, + "max_steps": 250, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 90, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.797935130464256e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/codegen/codegen_c_srcml/checkpoint-90/training_args.bin b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a7821e913d6ae10d45b53e723f526cff5f5fa882 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/checkpoint-90/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d8cefd12ccf98d630301cfc0e34899e503758b920913c79bb40dea210d1f4b8 +size 7416 diff --git a/codellama/c/codegen/codegen_c_srcml/completed b/codellama/c/codegen/codegen_c_srcml/completed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/codellama/c/codegen/codegen_c_srcml/metrics.json b/codellama/c/codegen/codegen_c_srcml/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..cf98a3315464b2b516bdee402b2183a74ed9a362 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/metrics.json @@ -0,0 +1 @@ +{"run_name": "codegen_c_srcml", "train_runtime": 16530.0404, "train_samples_per_second": 0.968, "train_steps_per_second": 0.015, "total_flos": 4.824681746497536e+17, "train_loss": 0.31994184494018557, "epoch": 1.5594541910331383} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_srcml/train_results.json b/codellama/c/codegen/codegen_c_srcml/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5c13066310556e5f10bf8cd7b0a69aa0a4e5ec08 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.5594541910331383, + "total_flos": 4.824681746497536e+17, + "train_loss": 0.31994184494018557, + "train_runtime": 16530.0404, + "train_samples_per_second": 0.968, + "train_steps_per_second": 0.015 +} \ No newline at end of file diff --git a/codellama/c/codegen/codegen_c_srcml/trainer_state.json b/codellama/c/codegen/codegen_c_srcml/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6bd80426a8d061c0cea69e4f504a890bc7325c35 --- /dev/null +++ b/codellama/c/codegen/codegen_c_srcml/trainer_state.json @@ -0,0 +1,392 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.5594541910331383, + "eval_steps": 500, + "global_step": 250, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.031189083820662766, + "grad_norm": 0.08056640625, + "learning_rate": 0.0001, + "loss": 0.6599, + "step": 5 + }, + { + "epoch": 0.06237816764132553, + "grad_norm": 0.049560546875, + "learning_rate": 0.0001, + "loss": 0.4191, + "step": 10 + }, + { + "epoch": 0.0935672514619883, + "grad_norm": 0.046875, + "learning_rate": 0.0001, + "loss": 0.3937, + "step": 15 + }, + { + "epoch": 0.12475633528265107, + "grad_norm": 0.056640625, + "learning_rate": 0.0001, + "loss": 0.3818, + "step": 20 + }, + { + "epoch": 0.15594541910331383, + "grad_norm": 0.030029296875, + "learning_rate": 0.0001, + "loss": 0.3685, + "step": 25 + }, + { + "epoch": 0.1871345029239766, + "grad_norm": 0.031005859375, + "learning_rate": 0.0001, + "loss": 0.361, + "step": 30 + }, + { + "epoch": 0.21832358674463936, + "grad_norm": 0.03564453125, + "learning_rate": 0.0001, + "loss": 0.3753, + "step": 35 + }, + { + "epoch": 0.24951267056530213, + "grad_norm": 0.1162109375, + "learning_rate": 0.0001, + "loss": 0.4059, + "step": 40 + }, + { + "epoch": 0.2807017543859649, + "grad_norm": 0.045166015625, + "learning_rate": 0.0001, + "loss": 0.3342, + "step": 45 + }, + { + "epoch": 0.31189083820662766, + "grad_norm": 0.027587890625, + "learning_rate": 0.0001, + "loss": 0.3155, + "step": 50 + }, + { + "epoch": 0.34307992202729043, + "grad_norm": 0.0284423828125, + "learning_rate": 0.0001, + "loss": 0.3219, + "step": 55 + }, + { + "epoch": 0.3742690058479532, + "grad_norm": 0.0255126953125, + "learning_rate": 0.0001, + "loss": 0.3279, + "step": 60 + }, + { + "epoch": 0.40545808966861596, + "grad_norm": 0.0267333984375, + "learning_rate": 0.0001, + "loss": 0.3145, + "step": 65 + }, + { + "epoch": 0.43664717348927873, + "grad_norm": 0.027099609375, + "learning_rate": 0.0001, + "loss": 0.3277, + "step": 70 + }, + { + "epoch": 0.4678362573099415, + "grad_norm": 0.03173828125, + "learning_rate": 0.0001, + "loss": 0.3354, + "step": 75 + }, + { + "epoch": 0.49902534113060426, + "grad_norm": 0.06982421875, + "learning_rate": 0.0001, + "loss": 0.3578, + "step": 80 + }, + { + "epoch": 0.530214424951267, + "grad_norm": 0.03759765625, + "learning_rate": 0.0001, + "loss": 0.317, + "step": 85 + }, + { + "epoch": 0.5614035087719298, + "grad_norm": 0.02734375, + "learning_rate": 0.0001, + "loss": 0.2833, + "step": 90 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 0.029052734375, + "learning_rate": 0.0001, + "loss": 0.2981, + "step": 95 + }, + { + "epoch": 0.6237816764132553, + "grad_norm": 0.0281982421875, + "learning_rate": 0.0001, + "loss": 0.3059, + "step": 100 + }, + { + "epoch": 0.6549707602339181, + "grad_norm": 0.0277099609375, + "learning_rate": 0.0001, + "loss": 0.2979, + "step": 105 + }, + { + "epoch": 0.6861598440545809, + "grad_norm": 0.0289306640625, + "learning_rate": 0.0001, + "loss": 0.3014, + "step": 110 + }, + { + "epoch": 0.7173489278752436, + "grad_norm": 0.033447265625, + "learning_rate": 0.0001, + "loss": 0.322, + "step": 115 + }, + { + "epoch": 0.7485380116959064, + "grad_norm": 0.08056640625, + "learning_rate": 0.0001, + "loss": 0.3465, + "step": 120 + }, + { + "epoch": 0.7797270955165692, + "grad_norm": 0.04296875, + "learning_rate": 0.0001, + "loss": 0.2876, + "step": 125 + }, + { + "epoch": 0.8109161793372319, + "grad_norm": 0.0291748046875, + "learning_rate": 0.0001, + "loss": 0.2804, + "step": 130 + }, + { + "epoch": 0.8421052631578947, + "grad_norm": 0.0264892578125, + "learning_rate": 0.0001, + "loss": 0.2998, + "step": 135 + }, + { + "epoch": 0.8732943469785575, + "grad_norm": 0.0269775390625, + "learning_rate": 0.0001, + "loss": 0.2981, + "step": 140 + }, + { + "epoch": 0.9044834307992202, + "grad_norm": 0.0302734375, + "learning_rate": 0.0001, + "loss": 0.3018, + "step": 145 + }, + { + "epoch": 0.935672514619883, + "grad_norm": 0.03271484375, + "learning_rate": 0.0001, + "loss": 0.2936, + "step": 150 + }, + { + "epoch": 0.9668615984405458, + "grad_norm": 0.0341796875, + "learning_rate": 0.0001, + "loss": 0.3017, + "step": 155 + }, + { + "epoch": 0.9980506822612085, + "grad_norm": 0.07763671875, + "learning_rate": 0.0001, + "loss": 0.35, + "step": 160 + }, + { + "epoch": 1.0292397660818713, + "grad_norm": 0.034423828125, + "learning_rate": 0.0001, + "loss": 0.2864, + "step": 165 + }, + { + "epoch": 1.060428849902534, + "grad_norm": 0.03515625, + "learning_rate": 0.0001, + "loss": 0.2628, + "step": 170 + }, + { + "epoch": 1.0916179337231968, + "grad_norm": 0.0322265625, + "learning_rate": 0.0001, + "loss": 0.2783, + "step": 175 + }, + { + "epoch": 1.1228070175438596, + "grad_norm": 0.031494140625, + "learning_rate": 0.0001, + "loss": 0.2912, + "step": 180 + }, + { + "epoch": 1.1539961013645224, + "grad_norm": 0.031005859375, + "learning_rate": 0.0001, + "loss": 0.2902, + "step": 185 + }, + { + "epoch": 1.1851851851851851, + "grad_norm": 0.035400390625, + "learning_rate": 0.0001, + "loss": 0.2783, + "step": 190 + }, + { + "epoch": 1.2163742690058479, + "grad_norm": 0.126953125, + "learning_rate": 0.0001, + "loss": 0.295, + "step": 195 + }, + { + "epoch": 1.2475633528265107, + "grad_norm": 0.057373046875, + "learning_rate": 0.0001, + "loss": 0.3035, + "step": 200 + }, + { + "epoch": 1.2787524366471734, + "grad_norm": 0.036376953125, + "learning_rate": 0.0001, + "loss": 0.2732, + "step": 205 + }, + { + "epoch": 1.3099415204678362, + "grad_norm": 0.040771484375, + "learning_rate": 0.0001, + "loss": 0.2738, + "step": 210 + }, + { + "epoch": 1.341130604288499, + "grad_norm": 0.03466796875, + "learning_rate": 0.0001, + "loss": 0.2868, + "step": 215 + }, + { + "epoch": 1.3723196881091617, + "grad_norm": 0.03662109375, + "learning_rate": 0.0001, + "loss": 0.2863, + "step": 220 + }, + { + "epoch": 1.4035087719298245, + "grad_norm": 0.036865234375, + "learning_rate": 0.0001, + "loss": 0.2858, + "step": 225 + }, + { + "epoch": 1.4346978557504872, + "grad_norm": 0.035400390625, + "learning_rate": 0.0001, + "loss": 0.2717, + "step": 230 + }, + { + "epoch": 1.46588693957115, + "grad_norm": 0.043212890625, + "learning_rate": 0.0001, + "loss": 0.2987, + "step": 235 + }, + { + "epoch": 1.4970760233918128, + "grad_norm": 0.0673828125, + "learning_rate": 0.0001, + "loss": 0.3034, + "step": 240 + }, + { + "epoch": 1.5282651072124755, + "grad_norm": 0.044677734375, + "learning_rate": 0.0001, + "loss": 0.2915, + "step": 245 + }, + { + "epoch": 1.5594541910331383, + "grad_norm": 0.033447265625, + "learning_rate": 0.0001, + "loss": 0.2551, + "step": 250 + }, + { + "epoch": 1.5594541910331383, + "step": 250, + "total_flos": 4.824681746497536e+17, + "train_loss": 0.31994184494018557, + "train_runtime": 16530.0404, + "train_samples_per_second": 0.968, + "train_steps_per_second": 0.015 + } + ], + "logging_steps": 5, + "max_steps": 250, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 90, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 4.824681746497536e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/codesum/codesum_c_base/all_results.json b/codellama/c/codesum/codesum_c_base/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5e4529b89bebaa513bcf3fb8f8cae51b419fec4f --- /dev/null +++ b/codellama/c/codesum/codesum_c_base/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 0.5037783375314862, + "total_flos": 4.334613097500672e+17, + "train_loss": 1.6184998904334174, + "train_runtime": 20544.5242, + "train_samples_per_second": 1.402, + "train_steps_per_second": 0.022 +} \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_base/checkpoint-450/README.md b/codellama/c/codesum/codesum_c_base/checkpoint-450/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codesum/codesum_c_base/checkpoint-450/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_base/checkpoint-450/adapter_config.json b/codellama/c/codesum/codesum_c_base/checkpoint-450/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9b5801818a155c4030185a51a0a1d7fc30a34885 --- /dev/null +++ b/codellama/c/codesum/codesum_c_base/checkpoint-450/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "gate_proj", + "down_proj", + "k_proj", + "q_proj", + "up_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_base/checkpoint-450/adapter_model.safetensors b/codellama/c/codesum/codesum_c_base/checkpoint-450/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..187175f4a9fc19dc32c77cf603d7354c0af698e5 --- /dev/null +++ b/codellama/c/codesum/codesum_c_base/checkpoint-450/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c77432acdb0b36bac0a78eb5d2893c4cb7d8c45c0f01d814fd95aa44133bda1 +size 1156480200 diff --git a/codellama/c/codesum/codesum_c_base/checkpoint-450/adapter_model/README.md b/codellama/c/codesum/codesum_c_base/checkpoint-450/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codesum/codesum_c_base/checkpoint-450/adapter_model/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_base/checkpoint-450/adapter_model/adapter_config.json b/codellama/c/codesum/codesum_c_base/checkpoint-450/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9b5801818a155c4030185a51a0a1d7fc30a34885 --- /dev/null +++ b/codellama/c/codesum/codesum_c_base/checkpoint-450/adapter_model/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "gate_proj", + "down_proj", + "k_proj", + "q_proj", + "up_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_base/checkpoint-450/adapter_model/adapter_model.safetensors b/codellama/c/codesum/codesum_c_base/checkpoint-450/adapter_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..187175f4a9fc19dc32c77cf603d7354c0af698e5 --- /dev/null +++ b/codellama/c/codesum/codesum_c_base/checkpoint-450/adapter_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c77432acdb0b36bac0a78eb5d2893c4cb7d8c45c0f01d814fd95aa44133bda1 +size 1156480200 diff --git a/codellama/c/codesum/codesum_c_base/checkpoint-450/added_tokens.json b/codellama/c/codesum/codesum_c_base/checkpoint-450/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074 --- /dev/null +++ b/codellama/c/codesum/codesum_c_base/checkpoint-450/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 32016 +} diff --git a/codellama/c/codesum/codesum_c_base/checkpoint-450/optimizer.pt b/codellama/c/codesum/codesum_c_base/checkpoint-450/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fd60fa0849528178dd6814b54355a8409904ab7 --- /dev/null +++ b/codellama/c/codesum/codesum_c_base/checkpoint-450/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15139e33e0eaa245b8c48a2264c0500d1d2c35e4205a28553a656573811bd7d9 +size 2003127538 diff --git a/codellama/c/codesum/codesum_c_base/checkpoint-450/rng_state.pth b/codellama/c/codesum/codesum_c_base/checkpoint-450/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..75607438305a6cd872edd07e5a21a914f698ce0b --- /dev/null +++ b/codellama/c/codesum/codesum_c_base/checkpoint-450/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9724b479bcde75696d93ccd1f92c294317abd162382cc656d5dcbb0500c63f6a +size 14244 diff --git a/codellama/c/codesum/codesum_c_base/checkpoint-450/scheduler.pt b/codellama/c/codesum/codesum_c_base/checkpoint-450/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5b95bc48aced6514998ca04f85182a6f50b3ae5 --- /dev/null +++ b/codellama/c/codesum/codesum_c_base/checkpoint-450/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94c8e40d3e998ff2e64b4e5b87135c84483399e6a8b1fe73e89c05c4855cb1f5 +size 1064 diff --git a/codellama/c/codesum/codesum_c_base/checkpoint-450/special_tokens_map.json b/codellama/c/codesum/codesum_c_base/checkpoint-450/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d --- /dev/null +++ b/codellama/c/codesum/codesum_c_base/checkpoint-450/special_tokens_map.json @@ -0,0 +1,36 @@ +{ + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/codellama/c/codesum/codesum_c_base/checkpoint-450/tokenizer.model b/codellama/c/codesum/codesum_c_base/checkpoint-450/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4 --- /dev/null +++ b/codellama/c/codesum/codesum_c_base/checkpoint-450/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6 +size 500058 diff --git a/codellama/c/codesum/codesum_c_base/checkpoint-450/tokenizer_config.json b/codellama/c/codesum/codesum_c_base/checkpoint-450/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d --- /dev/null +++ b/codellama/c/codesum/codesum_c_base/checkpoint-450/tokenizer_config.json @@ -0,0 +1,94 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": " ", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<", + "eot_token": "▁>\\n' + system_message + '\\n< >\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "fill_token": " ", + "legacy": null, + "middle_token": "▁ ", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "[PAD]", + "padding_side": "right", + "prefix_token": "▁ ", + "sp_model_kwargs": {}, + "suffix_first": false, + "suffix_token": "▁", + "tokenizer_class": "CodeLlamaTokenizer", + "unk_token": " ", + "use_default_system_prompt": false +} diff --git a/codellama/c/codesum/codesum_c_base/checkpoint-450/trainer_state.json b/codellama/c/codesum/codesum_c_base/checkpoint-450/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3779d4b04463f598ecb99652148cad8338961e78 --- /dev/null +++ b/codellama/c/codesum/codesum_c_base/checkpoint-450/trainer_state.json @@ -0,0 +1,663 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5037783375314862, + "eval_steps": 500, + "global_step": 450, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00559753708368318, + "grad_norm": 0.8359375, + "learning_rate": 0.0001, + "loss": 5.5483, + "step": 5 + }, + { + "epoch": 0.01119507416736636, + "grad_norm": 0.55078125, + "learning_rate": 0.0001, + "loss": 4.1635, + "step": 10 + }, + { + "epoch": 0.016792611251049538, + "grad_norm": 0.54296875, + "learning_rate": 0.0001, + "loss": 3.2229, + "step": 15 + }, + { + "epoch": 0.02239014833473272, + "grad_norm": 0.376953125, + "learning_rate": 0.0001, + "loss": 2.8432, + "step": 20 + }, + { + "epoch": 0.027987685418415897, + "grad_norm": 0.462890625, + "learning_rate": 0.0001, + "loss": 2.6705, + "step": 25 + }, + { + "epoch": 0.033585222502099076, + "grad_norm": 0.5234375, + "learning_rate": 0.0001, + "loss": 2.3014, + "step": 30 + }, + { + "epoch": 0.039182759585782254, + "grad_norm": 0.318359375, + "learning_rate": 0.0001, + "loss": 1.8735, + "step": 35 + }, + { + "epoch": 0.04478029666946544, + "grad_norm": 0.2578125, + "learning_rate": 0.0001, + "loss": 1.6909, + "step": 40 + }, + { + "epoch": 0.05037783375314862, + "grad_norm": 0.2158203125, + "learning_rate": 0.0001, + "loss": 1.6512, + "step": 45 + }, + { + "epoch": 0.055975370836831795, + "grad_norm": 0.28515625, + "learning_rate": 0.0001, + "loss": 1.5669, + "step": 50 + }, + { + "epoch": 0.06157290792051497, + "grad_norm": 0.2333984375, + "learning_rate": 0.0001, + "loss": 1.5499, + "step": 55 + }, + { + "epoch": 0.06717044500419815, + "grad_norm": 0.1943359375, + "learning_rate": 0.0001, + "loss": 1.557, + "step": 60 + }, + { + "epoch": 0.07276798208788134, + "grad_norm": 0.1748046875, + "learning_rate": 0.0001, + "loss": 1.5974, + "step": 65 + }, + { + "epoch": 0.07836551917156451, + "grad_norm": 0.173828125, + "learning_rate": 0.0001, + "loss": 1.6001, + "step": 70 + }, + { + "epoch": 0.08396305625524769, + "grad_norm": 0.171875, + "learning_rate": 0.0001, + "loss": 1.6237, + "step": 75 + }, + { + "epoch": 0.08956059333893088, + "grad_norm": 0.2578125, + "learning_rate": 0.0001, + "loss": 1.5338, + "step": 80 + }, + { + "epoch": 0.09515813042261405, + "grad_norm": 0.2177734375, + "learning_rate": 0.0001, + "loss": 1.53, + "step": 85 + }, + { + "epoch": 0.10075566750629723, + "grad_norm": 0.265625, + "learning_rate": 0.0001, + "loss": 1.5943, + "step": 90 + }, + { + "epoch": 0.1063532045899804, + "grad_norm": 0.240234375, + "learning_rate": 0.0001, + "loss": 1.5289, + "step": 95 + }, + { + "epoch": 0.11195074167366359, + "grad_norm": 0.283203125, + "learning_rate": 0.0001, + "loss": 1.5258, + "step": 100 + }, + { + "epoch": 0.11754827875734676, + "grad_norm": 0.1708984375, + "learning_rate": 0.0001, + "loss": 1.5399, + "step": 105 + }, + { + "epoch": 0.12314581584102995, + "grad_norm": 0.2001953125, + "learning_rate": 0.0001, + "loss": 1.5737, + "step": 110 + }, + { + "epoch": 0.12874335292471312, + "grad_norm": 0.1630859375, + "learning_rate": 0.0001, + "loss": 1.5441, + "step": 115 + }, + { + "epoch": 0.1343408900083963, + "grad_norm": 0.19140625, + "learning_rate": 0.0001, + "loss": 1.4422, + "step": 120 + }, + { + "epoch": 0.1399384270920795, + "grad_norm": 0.203125, + "learning_rate": 0.0001, + "loss": 1.452, + "step": 125 + }, + { + "epoch": 0.14553596417576267, + "grad_norm": 0.2353515625, + "learning_rate": 0.0001, + "loss": 1.4814, + "step": 130 + }, + { + "epoch": 0.15113350125944586, + "grad_norm": 0.265625, + "learning_rate": 0.0001, + "loss": 1.5935, + "step": 135 + }, + { + "epoch": 0.15673103834312901, + "grad_norm": 0.1884765625, + "learning_rate": 0.0001, + "loss": 1.444, + "step": 140 + }, + { + "epoch": 0.1623285754268122, + "grad_norm": 0.18359375, + "learning_rate": 0.0001, + "loss": 1.5664, + "step": 145 + }, + { + "epoch": 0.16792611251049538, + "grad_norm": 0.28515625, + "learning_rate": 0.0001, + "loss": 1.3374, + "step": 150 + }, + { + "epoch": 0.17352364959417857, + "grad_norm": 0.380859375, + "learning_rate": 0.0001, + "loss": 1.551, + "step": 155 + }, + { + "epoch": 0.17912118667786175, + "grad_norm": 0.2890625, + "learning_rate": 0.0001, + "loss": 1.5344, + "step": 160 + }, + { + "epoch": 0.1847187237615449, + "grad_norm": 0.216796875, + "learning_rate": 0.0001, + "loss": 1.5554, + "step": 165 + }, + { + "epoch": 0.1903162608452281, + "grad_norm": 0.1806640625, + "learning_rate": 0.0001, + "loss": 1.5077, + "step": 170 + }, + { + "epoch": 0.19591379792891128, + "grad_norm": 0.18359375, + "learning_rate": 0.0001, + "loss": 1.5209, + "step": 175 + }, + { + "epoch": 0.20151133501259447, + "grad_norm": 0.2001953125, + "learning_rate": 0.0001, + "loss": 1.4717, + "step": 180 + }, + { + "epoch": 0.20710887209627762, + "grad_norm": 0.19921875, + "learning_rate": 0.0001, + "loss": 1.5203, + "step": 185 + }, + { + "epoch": 0.2127064091799608, + "grad_norm": 0.2373046875, + "learning_rate": 0.0001, + "loss": 1.4326, + "step": 190 + }, + { + "epoch": 0.218303946263644, + "grad_norm": 0.203125, + "learning_rate": 0.0001, + "loss": 1.4371, + "step": 195 + }, + { + "epoch": 0.22390148334732718, + "grad_norm": 0.265625, + "learning_rate": 0.0001, + "loss": 1.3296, + "step": 200 + }, + { + "epoch": 0.22949902043101036, + "grad_norm": 0.5234375, + "learning_rate": 0.0001, + "loss": 1.4735, + "step": 205 + }, + { + "epoch": 0.23509655751469352, + "grad_norm": 0.20703125, + "learning_rate": 0.0001, + "loss": 1.5329, + "step": 210 + }, + { + "epoch": 0.2406940945983767, + "grad_norm": 0.2255859375, + "learning_rate": 0.0001, + "loss": 1.4673, + "step": 215 + }, + { + "epoch": 0.2462916316820599, + "grad_norm": 0.203125, + "learning_rate": 0.0001, + "loss": 1.5117, + "step": 220 + }, + { + "epoch": 0.2518891687657431, + "grad_norm": 0.1953125, + "learning_rate": 0.0001, + "loss": 1.5133, + "step": 225 + }, + { + "epoch": 0.25748670584942623, + "grad_norm": 0.267578125, + "learning_rate": 0.0001, + "loss": 1.5302, + "step": 230 + }, + { + "epoch": 0.26308424293310945, + "grad_norm": 0.1904296875, + "learning_rate": 0.0001, + "loss": 1.52, + "step": 235 + }, + { + "epoch": 0.2686817800167926, + "grad_norm": 0.1875, + "learning_rate": 0.0001, + "loss": 1.52, + "step": 240 + }, + { + "epoch": 0.2742793171004758, + "grad_norm": 0.193359375, + "learning_rate": 0.0001, + "loss": 1.4227, + "step": 245 + }, + { + "epoch": 0.279876854184159, + "grad_norm": 0.2890625, + "learning_rate": 0.0001, + "loss": 1.3521, + "step": 250 + }, + { + "epoch": 0.28547439126784213, + "grad_norm": 0.2080078125, + "learning_rate": 0.0001, + "loss": 1.4912, + "step": 255 + }, + { + "epoch": 0.29107192835152534, + "grad_norm": 0.23046875, + "learning_rate": 0.0001, + "loss": 1.5325, + "step": 260 + }, + { + "epoch": 0.2966694654352085, + "grad_norm": 0.296875, + "learning_rate": 0.0001, + "loss": 1.5691, + "step": 265 + }, + { + "epoch": 0.3022670025188917, + "grad_norm": 0.1943359375, + "learning_rate": 0.0001, + "loss": 1.4568, + "step": 270 + }, + { + "epoch": 0.30786453960257487, + "grad_norm": 0.240234375, + "learning_rate": 0.0001, + "loss": 1.5762, + "step": 275 + }, + { + "epoch": 0.31346207668625803, + "grad_norm": 0.255859375, + "learning_rate": 0.0001, + "loss": 1.4846, + "step": 280 + }, + { + "epoch": 0.31905961376994124, + "grad_norm": 0.2041015625, + "learning_rate": 0.0001, + "loss": 1.4436, + "step": 285 + }, + { + "epoch": 0.3246571508536244, + "grad_norm": 0.1875, + "learning_rate": 0.0001, + "loss": 1.5297, + "step": 290 + }, + { + "epoch": 0.3302546879373076, + "grad_norm": 0.216796875, + "learning_rate": 0.0001, + "loss": 1.4201, + "step": 295 + }, + { + "epoch": 0.33585222502099077, + "grad_norm": 0.353515625, + "learning_rate": 0.0001, + "loss": 1.3166, + "step": 300 + }, + { + "epoch": 0.3414497621046739, + "grad_norm": 0.357421875, + "learning_rate": 0.0001, + "loss": 1.4785, + "step": 305 + }, + { + "epoch": 0.34704729918835714, + "grad_norm": 0.30859375, + "learning_rate": 0.0001, + "loss": 1.5302, + "step": 310 + }, + { + "epoch": 0.3526448362720403, + "grad_norm": 0.201171875, + "learning_rate": 0.0001, + "loss": 1.4568, + "step": 315 + }, + { + "epoch": 0.3582423733557235, + "grad_norm": 0.2275390625, + "learning_rate": 0.0001, + "loss": 1.4256, + "step": 320 + }, + { + "epoch": 0.36383991043940667, + "grad_norm": 0.34375, + "learning_rate": 0.0001, + "loss": 1.4523, + "step": 325 + }, + { + "epoch": 0.3694374475230898, + "grad_norm": 0.1787109375, + "learning_rate": 0.0001, + "loss": 1.4466, + "step": 330 + }, + { + "epoch": 0.37503498460677304, + "grad_norm": 0.1982421875, + "learning_rate": 0.0001, + "loss": 1.4772, + "step": 335 + }, + { + "epoch": 0.3806325216904562, + "grad_norm": 0.2197265625, + "learning_rate": 0.0001, + "loss": 1.4707, + "step": 340 + }, + { + "epoch": 0.38623005877413935, + "grad_norm": 0.2158203125, + "learning_rate": 0.0001, + "loss": 1.3964, + "step": 345 + }, + { + "epoch": 0.39182759585782256, + "grad_norm": 0.255859375, + "learning_rate": 0.0001, + "loss": 1.3211, + "step": 350 + }, + { + "epoch": 0.3974251329415057, + "grad_norm": 0.3046875, + "learning_rate": 0.0001, + "loss": 1.3659, + "step": 355 + }, + { + "epoch": 0.40302267002518893, + "grad_norm": 0.279296875, + "learning_rate": 0.0001, + "loss": 1.5411, + "step": 360 + }, + { + "epoch": 0.4086202071088721, + "grad_norm": 0.2431640625, + "learning_rate": 0.0001, + "loss": 1.4501, + "step": 365 + }, + { + "epoch": 0.41421774419255525, + "grad_norm": 0.2021484375, + "learning_rate": 0.0001, + "loss": 1.5281, + "step": 370 + }, + { + "epoch": 0.41981528127623846, + "grad_norm": 0.234375, + "learning_rate": 0.0001, + "loss": 1.458, + "step": 375 + }, + { + "epoch": 0.4254128183599216, + "grad_norm": 0.201171875, + "learning_rate": 0.0001, + "loss": 1.3954, + "step": 380 + }, + { + "epoch": 0.43101035544360483, + "grad_norm": 0.1904296875, + "learning_rate": 0.0001, + "loss": 1.394, + "step": 385 + }, + { + "epoch": 0.436607892527288, + "grad_norm": 0.2041015625, + "learning_rate": 0.0001, + "loss": 1.4369, + "step": 390 + }, + { + "epoch": 0.44220542961097115, + "grad_norm": 0.2177734375, + "learning_rate": 0.0001, + "loss": 1.3687, + "step": 395 + }, + { + "epoch": 0.44780296669465436, + "grad_norm": 0.29296875, + "learning_rate": 0.0001, + "loss": 1.2293, + "step": 400 + }, + { + "epoch": 0.4534005037783375, + "grad_norm": 0.287109375, + "learning_rate": 0.0001, + "loss": 1.4791, + "step": 405 + }, + { + "epoch": 0.45899804086202073, + "grad_norm": 0.5546875, + "learning_rate": 0.0001, + "loss": 1.464, + "step": 410 + }, + { + "epoch": 0.4645955779457039, + "grad_norm": 0.251953125, + "learning_rate": 0.0001, + "loss": 1.4523, + "step": 415 + }, + { + "epoch": 0.47019311502938704, + "grad_norm": 0.2373046875, + "learning_rate": 0.0001, + "loss": 1.4332, + "step": 420 + }, + { + "epoch": 0.47579065211307026, + "grad_norm": 0.19921875, + "learning_rate": 0.0001, + "loss": 1.4185, + "step": 425 + }, + { + "epoch": 0.4813881891967534, + "grad_norm": 0.322265625, + "learning_rate": 0.0001, + "loss": 1.5112, + "step": 430 + }, + { + "epoch": 0.4869857262804366, + "grad_norm": 0.2236328125, + "learning_rate": 0.0001, + "loss": 1.5503, + "step": 435 + }, + { + "epoch": 0.4925832633641198, + "grad_norm": 0.2421875, + "learning_rate": 0.0001, + "loss": 1.3926, + "step": 440 + }, + { + "epoch": 0.49818080044780294, + "grad_norm": 0.232421875, + "learning_rate": 0.0001, + "loss": 1.3562, + "step": 445 + }, + { + "epoch": 0.5037783375314862, + "grad_norm": 0.3046875, + "learning_rate": 0.0001, + "loss": 1.3121, + "step": 450 + } + ], + "logging_steps": 5, + "max_steps": 450, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 90, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 4.334613097500672e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/codesum/codesum_c_base/checkpoint-450/training_args.bin b/codellama/c/codesum/codesum_c_base/checkpoint-450/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d856f7bc2f062ea0d97a68c998fdedf9e29f0f6d --- /dev/null +++ b/codellama/c/codesum/codesum_c_base/checkpoint-450/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bb1e20d4233432c62d310ac981c80c4e2fbfd390a9fd0cbf4ac191ee13b6c72 +size 7416 diff --git a/codellama/c/codesum/codesum_c_base/completed b/codellama/c/codesum/codesum_c_base/completed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/codellama/c/codesum/codesum_c_base/metrics.json b/codellama/c/codesum/codesum_c_base/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..281d355a7130c5088a39b0b9552f97932549276e --- /dev/null +++ b/codellama/c/codesum/codesum_c_base/metrics.json @@ -0,0 +1 @@ +{"run_name": "codesum_c_base", "train_runtime": 20544.5242, "train_samples_per_second": 1.402, "train_steps_per_second": 0.022, "total_flos": 4.334613097500672e+17, "train_loss": 1.6184998904334174, "epoch": 0.5037783375314862} \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_base/train_results.json b/codellama/c/codesum/codesum_c_base/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5e4529b89bebaa513bcf3fb8f8cae51b419fec4f --- /dev/null +++ b/codellama/c/codesum/codesum_c_base/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 0.5037783375314862, + "total_flos": 4.334613097500672e+17, + "train_loss": 1.6184998904334174, + "train_runtime": 20544.5242, + "train_samples_per_second": 1.402, + "train_steps_per_second": 0.022 +} \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_base/trainer_state.json b/codellama/c/codesum/codesum_c_base/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4c03f19b77df449cfafc2df2463a0d85dc394f84 --- /dev/null +++ b/codellama/c/codesum/codesum_c_base/trainer_state.json @@ -0,0 +1,672 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5037783375314862, + "eval_steps": 500, + "global_step": 450, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00559753708368318, + "grad_norm": 0.8359375, + "learning_rate": 0.0001, + "loss": 5.5483, + "step": 5 + }, + { + "epoch": 0.01119507416736636, + "grad_norm": 0.55078125, + "learning_rate": 0.0001, + "loss": 4.1635, + "step": 10 + }, + { + "epoch": 0.016792611251049538, + "grad_norm": 0.54296875, + "learning_rate": 0.0001, + "loss": 3.2229, + "step": 15 + }, + { + "epoch": 0.02239014833473272, + "grad_norm": 0.376953125, + "learning_rate": 0.0001, + "loss": 2.8432, + "step": 20 + }, + { + "epoch": 0.027987685418415897, + "grad_norm": 0.462890625, + "learning_rate": 0.0001, + "loss": 2.6705, + "step": 25 + }, + { + "epoch": 0.033585222502099076, + "grad_norm": 0.5234375, + "learning_rate": 0.0001, + "loss": 2.3014, + "step": 30 + }, + { + "epoch": 0.039182759585782254, + "grad_norm": 0.318359375, + "learning_rate": 0.0001, + "loss": 1.8735, + "step": 35 + }, + { + "epoch": 0.04478029666946544, + "grad_norm": 0.2578125, + "learning_rate": 0.0001, + "loss": 1.6909, + "step": 40 + }, + { + "epoch": 0.05037783375314862, + "grad_norm": 0.2158203125, + "learning_rate": 0.0001, + "loss": 1.6512, + "step": 45 + }, + { + "epoch": 0.055975370836831795, + "grad_norm": 0.28515625, + "learning_rate": 0.0001, + "loss": 1.5669, + "step": 50 + }, + { + "epoch": 0.06157290792051497, + "grad_norm": 0.2333984375, + "learning_rate": 0.0001, + "loss": 1.5499, + "step": 55 + }, + { + "epoch": 0.06717044500419815, + "grad_norm": 0.1943359375, + "learning_rate": 0.0001, + "loss": 1.557, + "step": 60 + }, + { + "epoch": 0.07276798208788134, + "grad_norm": 0.1748046875, + "learning_rate": 0.0001, + "loss": 1.5974, + "step": 65 + }, + { + "epoch": 0.07836551917156451, + "grad_norm": 0.173828125, + "learning_rate": 0.0001, + "loss": 1.6001, + "step": 70 + }, + { + "epoch": 0.08396305625524769, + "grad_norm": 0.171875, + "learning_rate": 0.0001, + "loss": 1.6237, + "step": 75 + }, + { + "epoch": 0.08956059333893088, + "grad_norm": 0.2578125, + "learning_rate": 0.0001, + "loss": 1.5338, + "step": 80 + }, + { + "epoch": 0.09515813042261405, + "grad_norm": 0.2177734375, + "learning_rate": 0.0001, + "loss": 1.53, + "step": 85 + }, + { + "epoch": 0.10075566750629723, + "grad_norm": 0.265625, + "learning_rate": 0.0001, + "loss": 1.5943, + "step": 90 + }, + { + "epoch": 0.1063532045899804, + "grad_norm": 0.240234375, + "learning_rate": 0.0001, + "loss": 1.5289, + "step": 95 + }, + { + "epoch": 0.11195074167366359, + "grad_norm": 0.283203125, + "learning_rate": 0.0001, + "loss": 1.5258, + "step": 100 + }, + { + "epoch": 0.11754827875734676, + "grad_norm": 0.1708984375, + "learning_rate": 0.0001, + "loss": 1.5399, + "step": 105 + }, + { + "epoch": 0.12314581584102995, + "grad_norm": 0.2001953125, + "learning_rate": 0.0001, + "loss": 1.5737, + "step": 110 + }, + { + "epoch": 0.12874335292471312, + "grad_norm": 0.1630859375, + "learning_rate": 0.0001, + "loss": 1.5441, + "step": 115 + }, + { + "epoch": 0.1343408900083963, + "grad_norm": 0.19140625, + "learning_rate": 0.0001, + "loss": 1.4422, + "step": 120 + }, + { + "epoch": 0.1399384270920795, + "grad_norm": 0.203125, + "learning_rate": 0.0001, + "loss": 1.452, + "step": 125 + }, + { + "epoch": 0.14553596417576267, + "grad_norm": 0.2353515625, + "learning_rate": 0.0001, + "loss": 1.4814, + "step": 130 + }, + { + "epoch": 0.15113350125944586, + "grad_norm": 0.265625, + "learning_rate": 0.0001, + "loss": 1.5935, + "step": 135 + }, + { + "epoch": 0.15673103834312901, + "grad_norm": 0.1884765625, + "learning_rate": 0.0001, + "loss": 1.444, + "step": 140 + }, + { + "epoch": 0.1623285754268122, + "grad_norm": 0.18359375, + "learning_rate": 0.0001, + "loss": 1.5664, + "step": 145 + }, + { + "epoch": 0.16792611251049538, + "grad_norm": 0.28515625, + "learning_rate": 0.0001, + "loss": 1.3374, + "step": 150 + }, + { + "epoch": 0.17352364959417857, + "grad_norm": 0.380859375, + "learning_rate": 0.0001, + "loss": 1.551, + "step": 155 + }, + { + "epoch": 0.17912118667786175, + "grad_norm": 0.2890625, + "learning_rate": 0.0001, + "loss": 1.5344, + "step": 160 + }, + { + "epoch": 0.1847187237615449, + "grad_norm": 0.216796875, + "learning_rate": 0.0001, + "loss": 1.5554, + "step": 165 + }, + { + "epoch": 0.1903162608452281, + "grad_norm": 0.1806640625, + "learning_rate": 0.0001, + "loss": 1.5077, + "step": 170 + }, + { + "epoch": 0.19591379792891128, + "grad_norm": 0.18359375, + "learning_rate": 0.0001, + "loss": 1.5209, + "step": 175 + }, + { + "epoch": 0.20151133501259447, + "grad_norm": 0.2001953125, + "learning_rate": 0.0001, + "loss": 1.4717, + "step": 180 + }, + { + "epoch": 0.20710887209627762, + "grad_norm": 0.19921875, + "learning_rate": 0.0001, + "loss": 1.5203, + "step": 185 + }, + { + "epoch": 0.2127064091799608, + "grad_norm": 0.2373046875, + "learning_rate": 0.0001, + "loss": 1.4326, + "step": 190 + }, + { + "epoch": 0.218303946263644, + "grad_norm": 0.203125, + "learning_rate": 0.0001, + "loss": 1.4371, + "step": 195 + }, + { + "epoch": 0.22390148334732718, + "grad_norm": 0.265625, + "learning_rate": 0.0001, + "loss": 1.3296, + "step": 200 + }, + { + "epoch": 0.22949902043101036, + "grad_norm": 0.5234375, + "learning_rate": 0.0001, + "loss": 1.4735, + "step": 205 + }, + { + "epoch": 0.23509655751469352, + "grad_norm": 0.20703125, + "learning_rate": 0.0001, + "loss": 1.5329, + "step": 210 + }, + { + "epoch": 0.2406940945983767, + "grad_norm": 0.2255859375, + "learning_rate": 0.0001, + "loss": 1.4673, + "step": 215 + }, + { + "epoch": 0.2462916316820599, + "grad_norm": 0.203125, + "learning_rate": 0.0001, + "loss": 1.5117, + "step": 220 + }, + { + "epoch": 0.2518891687657431, + "grad_norm": 0.1953125, + "learning_rate": 0.0001, + "loss": 1.5133, + "step": 225 + }, + { + "epoch": 0.25748670584942623, + "grad_norm": 0.267578125, + "learning_rate": 0.0001, + "loss": 1.5302, + "step": 230 + }, + { + "epoch": 0.26308424293310945, + "grad_norm": 0.1904296875, + "learning_rate": 0.0001, + "loss": 1.52, + "step": 235 + }, + { + "epoch": 0.2686817800167926, + "grad_norm": 0.1875, + "learning_rate": 0.0001, + "loss": 1.52, + "step": 240 + }, + { + "epoch": 0.2742793171004758, + "grad_norm": 0.193359375, + "learning_rate": 0.0001, + "loss": 1.4227, + "step": 245 + }, + { + "epoch": 0.279876854184159, + "grad_norm": 0.2890625, + "learning_rate": 0.0001, + "loss": 1.3521, + "step": 250 + }, + { + "epoch": 0.28547439126784213, + "grad_norm": 0.2080078125, + "learning_rate": 0.0001, + "loss": 1.4912, + "step": 255 + }, + { + "epoch": 0.29107192835152534, + "grad_norm": 0.23046875, + "learning_rate": 0.0001, + "loss": 1.5325, + "step": 260 + }, + { + "epoch": 0.2966694654352085, + "grad_norm": 0.296875, + "learning_rate": 0.0001, + "loss": 1.5691, + "step": 265 + }, + { + "epoch": 0.3022670025188917, + "grad_norm": 0.1943359375, + "learning_rate": 0.0001, + "loss": 1.4568, + "step": 270 + }, + { + "epoch": 0.30786453960257487, + "grad_norm": 0.240234375, + "learning_rate": 0.0001, + "loss": 1.5762, + "step": 275 + }, + { + "epoch": 0.31346207668625803, + "grad_norm": 0.255859375, + "learning_rate": 0.0001, + "loss": 1.4846, + "step": 280 + }, + { + "epoch": 0.31905961376994124, + "grad_norm": 0.2041015625, + "learning_rate": 0.0001, + "loss": 1.4436, + "step": 285 + }, + { + "epoch": 0.3246571508536244, + "grad_norm": 0.1875, + "learning_rate": 0.0001, + "loss": 1.5297, + "step": 290 + }, + { + "epoch": 0.3302546879373076, + "grad_norm": 0.216796875, + "learning_rate": 0.0001, + "loss": 1.4201, + "step": 295 + }, + { + "epoch": 0.33585222502099077, + "grad_norm": 0.353515625, + "learning_rate": 0.0001, + "loss": 1.3166, + "step": 300 + }, + { + "epoch": 0.3414497621046739, + "grad_norm": 0.357421875, + "learning_rate": 0.0001, + "loss": 1.4785, + "step": 305 + }, + { + "epoch": 0.34704729918835714, + "grad_norm": 0.30859375, + "learning_rate": 0.0001, + "loss": 1.5302, + "step": 310 + }, + { + "epoch": 0.3526448362720403, + "grad_norm": 0.201171875, + "learning_rate": 0.0001, + "loss": 1.4568, + "step": 315 + }, + { + "epoch": 0.3582423733557235, + "grad_norm": 0.2275390625, + "learning_rate": 0.0001, + "loss": 1.4256, + "step": 320 + }, + { + "epoch": 0.36383991043940667, + "grad_norm": 0.34375, + "learning_rate": 0.0001, + "loss": 1.4523, + "step": 325 + }, + { + "epoch": 0.3694374475230898, + "grad_norm": 0.1787109375, + "learning_rate": 0.0001, + "loss": 1.4466, + "step": 330 + }, + { + "epoch": 0.37503498460677304, + "grad_norm": 0.1982421875, + "learning_rate": 0.0001, + "loss": 1.4772, + "step": 335 + }, + { + "epoch": 0.3806325216904562, + "grad_norm": 0.2197265625, + "learning_rate": 0.0001, + "loss": 1.4707, + "step": 340 + }, + { + "epoch": 0.38623005877413935, + "grad_norm": 0.2158203125, + "learning_rate": 0.0001, + "loss": 1.3964, + "step": 345 + }, + { + "epoch": 0.39182759585782256, + "grad_norm": 0.255859375, + "learning_rate": 0.0001, + "loss": 1.3211, + "step": 350 + }, + { + "epoch": 0.3974251329415057, + "grad_norm": 0.3046875, + "learning_rate": 0.0001, + "loss": 1.3659, + "step": 355 + }, + { + "epoch": 0.40302267002518893, + "grad_norm": 0.279296875, + "learning_rate": 0.0001, + "loss": 1.5411, + "step": 360 + }, + { + "epoch": 0.4086202071088721, + "grad_norm": 0.2431640625, + "learning_rate": 0.0001, + "loss": 1.4501, + "step": 365 + }, + { + "epoch": 0.41421774419255525, + "grad_norm": 0.2021484375, + "learning_rate": 0.0001, + "loss": 1.5281, + "step": 370 + }, + { + "epoch": 0.41981528127623846, + "grad_norm": 0.234375, + "learning_rate": 0.0001, + "loss": 1.458, + "step": 375 + }, + { + "epoch": 0.4254128183599216, + "grad_norm": 0.201171875, + "learning_rate": 0.0001, + "loss": 1.3954, + "step": 380 + }, + { + "epoch": 0.43101035544360483, + "grad_norm": 0.1904296875, + "learning_rate": 0.0001, + "loss": 1.394, + "step": 385 + }, + { + "epoch": 0.436607892527288, + "grad_norm": 0.2041015625, + "learning_rate": 0.0001, + "loss": 1.4369, + "step": 390 + }, + { + "epoch": 0.44220542961097115, + "grad_norm": 0.2177734375, + "learning_rate": 0.0001, + "loss": 1.3687, + "step": 395 + }, + { + "epoch": 0.44780296669465436, + "grad_norm": 0.29296875, + "learning_rate": 0.0001, + "loss": 1.2293, + "step": 400 + }, + { + "epoch": 0.4534005037783375, + "grad_norm": 0.287109375, + "learning_rate": 0.0001, + "loss": 1.4791, + "step": 405 + }, + { + "epoch": 0.45899804086202073, + "grad_norm": 0.5546875, + "learning_rate": 0.0001, + "loss": 1.464, + "step": 410 + }, + { + "epoch": 0.4645955779457039, + "grad_norm": 0.251953125, + "learning_rate": 0.0001, + "loss": 1.4523, + "step": 415 + }, + { + "epoch": 0.47019311502938704, + "grad_norm": 0.2373046875, + "learning_rate": 0.0001, + "loss": 1.4332, + "step": 420 + }, + { + "epoch": 0.47579065211307026, + "grad_norm": 0.19921875, + "learning_rate": 0.0001, + "loss": 1.4185, + "step": 425 + }, + { + "epoch": 0.4813881891967534, + "grad_norm": 0.322265625, + "learning_rate": 0.0001, + "loss": 1.5112, + "step": 430 + }, + { + "epoch": 0.4869857262804366, + "grad_norm": 0.2236328125, + "learning_rate": 0.0001, + "loss": 1.5503, + "step": 435 + }, + { + "epoch": 0.4925832633641198, + "grad_norm": 0.2421875, + "learning_rate": 0.0001, + "loss": 1.3926, + "step": 440 + }, + { + "epoch": 0.49818080044780294, + "grad_norm": 0.232421875, + "learning_rate": 0.0001, + "loss": 1.3562, + "step": 445 + }, + { + "epoch": 0.5037783375314862, + "grad_norm": 0.3046875, + "learning_rate": 0.0001, + "loss": 1.3121, + "step": 450 + }, + { + "epoch": 0.5037783375314862, + "step": 450, + "total_flos": 4.334613097500672e+17, + "train_loss": 1.6184998904334174, + "train_runtime": 20544.5242, + "train_samples_per_second": 1.402, + "train_steps_per_second": 0.022 + } + ], + "logging_steps": 5, + "max_steps": 450, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 90, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 4.334613097500672e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/codesum/codesum_c_callgraph/all_results.json b/codellama/c/codesum/codesum_c_callgraph/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..65e253df1edc9d1337727a70280f457c93ce648f --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 0.5037783375314862, + "total_flos": 4.334613097500672e+17, + "train_loss": 1.541747768190172, + "train_runtime": 17912.5916, + "train_samples_per_second": 1.608, + "train_steps_per_second": 0.025 +} \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/README.md b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/adapter_config.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a120c2f7228bd25407c0120a8b6f8c00806f84bb --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "k_proj", + "down_proj", + "q_proj", + "up_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/adapter_model.safetensors b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f0c118ea950fca7759b93a2e8794d10b4d3e9463 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a43001fef7b56249465c8c01fea1ae1f33fd2bea74a8e730d0bb6852dfec48 +size 1156480200 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/adapter_model/README.md b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/adapter_model/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/adapter_model/adapter_config.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a120c2f7228bd25407c0120a8b6f8c00806f84bb --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/adapter_model/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "k_proj", + "down_proj", + "q_proj", + "up_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/adapter_model/adapter_model.safetensors b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/adapter_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f0c118ea950fca7759b93a2e8794d10b4d3e9463 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/adapter_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a43001fef7b56249465c8c01fea1ae1f33fd2bea74a8e730d0bb6852dfec48 +size 1156480200 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/added_tokens.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 32016 +} diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/optimizer.pt b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..75a5be612adf54eb3c8503525c7661c2e2ed3c31 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:858286d17de56071411d3b43d492784c45ceab000e3534026ec22cc6748aff2a +size 2003126962 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/rng_state.pth b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..75607438305a6cd872edd07e5a21a914f698ce0b --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9724b479bcde75696d93ccd1f92c294317abd162382cc656d5dcbb0500c63f6a +size 14244 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/scheduler.pt b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd32f24b55247712dc306a7f48b1e67f9136b26b --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:244453cd6aad26ed6e8f9d969778193b9354089d8336fe58bfb91c089a53bf6f +size 1064 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/special_tokens_map.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/special_tokens_map.json @@ -0,0 +1,36 @@ +{ + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/tokenizer.model b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6 +size 500058 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/tokenizer_config.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/tokenizer_config.json @@ -0,0 +1,94 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": " ", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<", + "eot_token": "▁>\\n' + system_message + '\\n< >\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "fill_token": " ", + "legacy": null, + "middle_token": "▁ ", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "[PAD]", + "padding_side": "right", + "prefix_token": "▁ ", + "sp_model_kwargs": {}, + "suffix_first": false, + "suffix_token": "▁", + "tokenizer_class": "CodeLlamaTokenizer", + "unk_token": " ", + "use_default_system_prompt": false +} diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/trainer_state.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..65656437835dde777e696ee4b086dc7c9807e4e3 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/trainer_state.json @@ -0,0 +1,285 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.20151133501259447, + "eval_steps": 500, + "global_step": 180, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00559753708368318, + "grad_norm": 1.2421875, + "learning_rate": 0.0001, + "loss": 4.5193, + "step": 5 + }, + { + "epoch": 0.01119507416736636, + "grad_norm": 0.68359375, + "learning_rate": 0.0001, + "loss": 2.8387, + "step": 10 + }, + { + "epoch": 0.016792611251049538, + "grad_norm": 0.484375, + "learning_rate": 0.0001, + "loss": 2.1966, + "step": 15 + }, + { + "epoch": 0.02239014833473272, + "grad_norm": 0.51953125, + "learning_rate": 0.0001, + "loss": 2.0024, + "step": 20 + }, + { + "epoch": 0.027987685418415897, + "grad_norm": 0.296875, + "learning_rate": 0.0001, + "loss": 1.7735, + "step": 25 + }, + { + "epoch": 0.033585222502099076, + "grad_norm": 0.263671875, + "learning_rate": 0.0001, + "loss": 1.6781, + "step": 30 + }, + { + "epoch": 0.039182759585782254, + "grad_norm": 0.291015625, + "learning_rate": 0.0001, + "loss": 1.6619, + "step": 35 + }, + { + "epoch": 0.04478029666946544, + "grad_norm": 0.3125, + "learning_rate": 0.0001, + "loss": 1.6361, + "step": 40 + }, + { + "epoch": 0.05037783375314862, + "grad_norm": 0.298828125, + "learning_rate": 0.0001, + "loss": 1.6153, + "step": 45 + }, + { + "epoch": 0.055975370836831795, + "grad_norm": 0.306640625, + "learning_rate": 0.0001, + "loss": 1.5201, + "step": 50 + }, + { + "epoch": 0.06157290792051497, + "grad_norm": 0.296875, + "learning_rate": 0.0001, + "loss": 1.5211, + "step": 55 + }, + { + "epoch": 0.06717044500419815, + "grad_norm": 0.2392578125, + "learning_rate": 0.0001, + "loss": 1.5359, + "step": 60 + }, + { + "epoch": 0.07276798208788134, + "grad_norm": 0.251953125, + "learning_rate": 0.0001, + "loss": 1.5686, + "step": 65 + }, + { + "epoch": 0.07836551917156451, + "grad_norm": 0.255859375, + "learning_rate": 0.0001, + "loss": 1.5732, + "step": 70 + }, + { + "epoch": 0.08396305625524769, + "grad_norm": 0.244140625, + "learning_rate": 0.0001, + "loss": 1.5958, + "step": 75 + }, + { + "epoch": 0.08956059333893088, + "grad_norm": 0.25390625, + "learning_rate": 0.0001, + "loss": 1.5006, + "step": 80 + }, + { + "epoch": 0.09515813042261405, + "grad_norm": 0.2890625, + "learning_rate": 0.0001, + "loss": 1.5051, + "step": 85 + }, + { + "epoch": 0.10075566750629723, + "grad_norm": 0.314453125, + "learning_rate": 0.0001, + "loss": 1.5649, + "step": 90 + }, + { + "epoch": 0.1063532045899804, + "grad_norm": 0.263671875, + "learning_rate": 0.0001, + "loss": 1.5, + "step": 95 + }, + { + "epoch": 0.11195074167366359, + "grad_norm": 0.310546875, + "learning_rate": 0.0001, + "loss": 1.4951, + "step": 100 + }, + { + "epoch": 0.11754827875734676, + "grad_norm": 0.2412109375, + "learning_rate": 0.0001, + "loss": 1.5258, + "step": 105 + }, + { + "epoch": 0.12314581584102995, + "grad_norm": 0.2236328125, + "learning_rate": 0.0001, + "loss": 1.556, + "step": 110 + }, + { + "epoch": 0.12874335292471312, + "grad_norm": 0.1982421875, + "learning_rate": 0.0001, + "loss": 1.5306, + "step": 115 + }, + { + "epoch": 0.1343408900083963, + "grad_norm": 0.203125, + "learning_rate": 0.0001, + "loss": 1.4242, + "step": 120 + }, + { + "epoch": 0.1399384270920795, + "grad_norm": 0.2216796875, + "learning_rate": 0.0001, + "loss": 1.4403, + "step": 125 + }, + { + "epoch": 0.14553596417576267, + "grad_norm": 0.2265625, + "learning_rate": 0.0001, + "loss": 1.4604, + "step": 130 + }, + { + "epoch": 0.15113350125944586, + "grad_norm": 0.2275390625, + "learning_rate": 0.0001, + "loss": 1.5809, + "step": 135 + }, + { + "epoch": 0.15673103834312901, + "grad_norm": 0.2421875, + "learning_rate": 0.0001, + "loss": 1.4282, + "step": 140 + }, + { + "epoch": 0.1623285754268122, + "grad_norm": 0.228515625, + "learning_rate": 0.0001, + "loss": 1.5452, + "step": 145 + }, + { + "epoch": 0.16792611251049538, + "grad_norm": 0.359375, + "learning_rate": 0.0001, + "loss": 1.3127, + "step": 150 + }, + { + "epoch": 0.17352364959417857, + "grad_norm": 0.46875, + "learning_rate": 0.0001, + "loss": 1.5287, + "step": 155 + }, + { + "epoch": 0.17912118667786175, + "grad_norm": 0.28515625, + "learning_rate": 0.0001, + "loss": 1.5197, + "step": 160 + }, + { + "epoch": 0.1847187237615449, + "grad_norm": 0.201171875, + "learning_rate": 0.0001, + "loss": 1.5512, + "step": 165 + }, + { + "epoch": 0.1903162608452281, + "grad_norm": 0.2490234375, + "learning_rate": 0.0001, + "loss": 1.4973, + "step": 170 + }, + { + "epoch": 0.19591379792891128, + "grad_norm": 0.220703125, + "learning_rate": 0.0001, + "loss": 1.503, + "step": 175 + }, + { + "epoch": 0.20151133501259447, + "grad_norm": 0.2080078125, + "learning_rate": 0.0001, + "loss": 1.4571, + "step": 180 + } + ], + "logging_steps": 5, + "max_steps": 450, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 90, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.8132901640749056e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/training_args.bin b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..281d44350bc11cf3e7040f5deeb911bb026435c4 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-180/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de79dec5e578f68fe50f2703c05abcde10ccb18697ac5a3edfec63f4ac7e3b83 +size 7416 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/README.md b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/adapter_config.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a120c2f7228bd25407c0120a8b6f8c00806f84bb --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "k_proj", + "down_proj", + "q_proj", + "up_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/adapter_model.safetensors b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8826c0132e1193bd6676511c2e1b32469fd57e6 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d1988e012b2b7cc222cedf6dab39da4e9b578da6e51b7520f77d89450ebb473 +size 1156480200 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/adapter_model/README.md b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/adapter_model/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/adapter_model/adapter_config.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a120c2f7228bd25407c0120a8b6f8c00806f84bb --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/adapter_model/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "k_proj", + "down_proj", + "q_proj", + "up_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/adapter_model/adapter_model.safetensors b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/adapter_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8826c0132e1193bd6676511c2e1b32469fd57e6 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/adapter_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d1988e012b2b7cc222cedf6dab39da4e9b578da6e51b7520f77d89450ebb473 +size 1156480200 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/added_tokens.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 32016 +} diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/optimizer.pt b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a395313f8b82d20533fdd2c9a8b29fbcd2b6222 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cc2dc1d3ed5866419e8e5da484bcd310cb0bad4c5060c2674492002b4648cda +size 2003127538 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/rng_state.pth b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..75607438305a6cd872edd07e5a21a914f698ce0b --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9724b479bcde75696d93ccd1f92c294317abd162382cc656d5dcbb0500c63f6a +size 14244 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/scheduler.pt b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0bd2ccff76c23e0931b9dce2a864bfea7c8c94fa --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:554ac98e1be0a401146c440f8f17f6178e4bc14ffbb34e0ab6f4bc1b19a709d0 +size 1064 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/special_tokens_map.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/special_tokens_map.json @@ -0,0 +1,36 @@ +{ + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/tokenizer.model b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6 +size 500058 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/tokenizer_config.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/tokenizer_config.json @@ -0,0 +1,94 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": " ", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<", + "eot_token": "▁>\\n' + system_message + '\\n< >\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "fill_token": " ", + "legacy": null, + "middle_token": "▁ ", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "[PAD]", + "padding_side": "right", + "prefix_token": "▁ ", + "sp_model_kwargs": {}, + "suffix_first": false, + "suffix_token": "▁", + "tokenizer_class": "CodeLlamaTokenizer", + "unk_token": " ", + "use_default_system_prompt": false +} diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/trainer_state.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d90410bae9d2c72888fb198f783e25edbbacdaba --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/trainer_state.json @@ -0,0 +1,411 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3022670025188917, + "eval_steps": 500, + "global_step": 270, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00559753708368318, + "grad_norm": 1.2421875, + "learning_rate": 0.0001, + "loss": 4.5193, + "step": 5 + }, + { + "epoch": 0.01119507416736636, + "grad_norm": 0.68359375, + "learning_rate": 0.0001, + "loss": 2.8387, + "step": 10 + }, + { + "epoch": 0.016792611251049538, + "grad_norm": 0.484375, + "learning_rate": 0.0001, + "loss": 2.1966, + "step": 15 + }, + { + "epoch": 0.02239014833473272, + "grad_norm": 0.51953125, + "learning_rate": 0.0001, + "loss": 2.0024, + "step": 20 + }, + { + "epoch": 0.027987685418415897, + "grad_norm": 0.296875, + "learning_rate": 0.0001, + "loss": 1.7735, + "step": 25 + }, + { + "epoch": 0.033585222502099076, + "grad_norm": 0.263671875, + "learning_rate": 0.0001, + "loss": 1.6781, + "step": 30 + }, + { + "epoch": 0.039182759585782254, + "grad_norm": 0.291015625, + "learning_rate": 0.0001, + "loss": 1.6619, + "step": 35 + }, + { + "epoch": 0.04478029666946544, + "grad_norm": 0.3125, + "learning_rate": 0.0001, + "loss": 1.6361, + "step": 40 + }, + { + "epoch": 0.05037783375314862, + "grad_norm": 0.298828125, + "learning_rate": 0.0001, + "loss": 1.6153, + "step": 45 + }, + { + "epoch": 0.055975370836831795, + "grad_norm": 0.306640625, + "learning_rate": 0.0001, + "loss": 1.5201, + "step": 50 + }, + { + "epoch": 0.06157290792051497, + "grad_norm": 0.296875, + "learning_rate": 0.0001, + "loss": 1.5211, + "step": 55 + }, + { + "epoch": 0.06717044500419815, + "grad_norm": 0.2392578125, + "learning_rate": 0.0001, + "loss": 1.5359, + "step": 60 + }, + { + "epoch": 0.07276798208788134, + "grad_norm": 0.251953125, + "learning_rate": 0.0001, + "loss": 1.5686, + "step": 65 + }, + { + "epoch": 0.07836551917156451, + "grad_norm": 0.255859375, + "learning_rate": 0.0001, + "loss": 1.5732, + "step": 70 + }, + { + "epoch": 0.08396305625524769, + "grad_norm": 0.244140625, + "learning_rate": 0.0001, + "loss": 1.5958, + "step": 75 + }, + { + "epoch": 0.08956059333893088, + "grad_norm": 0.25390625, + "learning_rate": 0.0001, + "loss": 1.5006, + "step": 80 + }, + { + "epoch": 0.09515813042261405, + "grad_norm": 0.2890625, + "learning_rate": 0.0001, + "loss": 1.5051, + "step": 85 + }, + { + "epoch": 0.10075566750629723, + "grad_norm": 0.314453125, + "learning_rate": 0.0001, + "loss": 1.5649, + "step": 90 + }, + { + "epoch": 0.1063532045899804, + "grad_norm": 0.263671875, + "learning_rate": 0.0001, + "loss": 1.5, + "step": 95 + }, + { + "epoch": 0.11195074167366359, + "grad_norm": 0.310546875, + "learning_rate": 0.0001, + "loss": 1.4951, + "step": 100 + }, + { + "epoch": 0.11754827875734676, + "grad_norm": 0.2412109375, + "learning_rate": 0.0001, + "loss": 1.5258, + "step": 105 + }, + { + "epoch": 0.12314581584102995, + "grad_norm": 0.2236328125, + "learning_rate": 0.0001, + "loss": 1.556, + "step": 110 + }, + { + "epoch": 0.12874335292471312, + "grad_norm": 0.1982421875, + "learning_rate": 0.0001, + "loss": 1.5306, + "step": 115 + }, + { + "epoch": 0.1343408900083963, + "grad_norm": 0.203125, + "learning_rate": 0.0001, + "loss": 1.4242, + "step": 120 + }, + { + "epoch": 0.1399384270920795, + "grad_norm": 0.2216796875, + "learning_rate": 0.0001, + "loss": 1.4403, + "step": 125 + }, + { + "epoch": 0.14553596417576267, + "grad_norm": 0.2265625, + "learning_rate": 0.0001, + "loss": 1.4604, + "step": 130 + }, + { + "epoch": 0.15113350125944586, + "grad_norm": 0.2275390625, + "learning_rate": 0.0001, + "loss": 1.5809, + "step": 135 + }, + { + "epoch": 0.15673103834312901, + "grad_norm": 0.2421875, + "learning_rate": 0.0001, + "loss": 1.4282, + "step": 140 + }, + { + "epoch": 0.1623285754268122, + "grad_norm": 0.228515625, + "learning_rate": 0.0001, + "loss": 1.5452, + "step": 145 + }, + { + "epoch": 0.16792611251049538, + "grad_norm": 0.359375, + "learning_rate": 0.0001, + "loss": 1.3127, + "step": 150 + }, + { + "epoch": 0.17352364959417857, + "grad_norm": 0.46875, + "learning_rate": 0.0001, + "loss": 1.5287, + "step": 155 + }, + { + "epoch": 0.17912118667786175, + "grad_norm": 0.28515625, + "learning_rate": 0.0001, + "loss": 1.5197, + "step": 160 + }, + { + "epoch": 0.1847187237615449, + "grad_norm": 0.201171875, + "learning_rate": 0.0001, + "loss": 1.5512, + "step": 165 + }, + { + "epoch": 0.1903162608452281, + "grad_norm": 0.2490234375, + "learning_rate": 0.0001, + "loss": 1.4973, + "step": 170 + }, + { + "epoch": 0.19591379792891128, + "grad_norm": 0.220703125, + "learning_rate": 0.0001, + "loss": 1.503, + "step": 175 + }, + { + "epoch": 0.20151133501259447, + "grad_norm": 0.2080078125, + "learning_rate": 0.0001, + "loss": 1.4571, + "step": 180 + }, + { + "epoch": 0.20710887209627762, + "grad_norm": 0.2119140625, + "learning_rate": 0.0001, + "loss": 1.5066, + "step": 185 + }, + { + "epoch": 0.2127064091799608, + "grad_norm": 0.2470703125, + "learning_rate": 0.0001, + "loss": 1.42, + "step": 190 + }, + { + "epoch": 0.218303946263644, + "grad_norm": 0.2275390625, + "learning_rate": 0.0001, + "loss": 1.4306, + "step": 195 + }, + { + "epoch": 0.22390148334732718, + "grad_norm": 0.357421875, + "learning_rate": 0.0001, + "loss": 1.3198, + "step": 200 + }, + { + "epoch": 0.22949902043101036, + "grad_norm": 0.2109375, + "learning_rate": 0.0001, + "loss": 1.4567, + "step": 205 + }, + { + "epoch": 0.23509655751469352, + "grad_norm": 0.265625, + "learning_rate": 0.0001, + "loss": 1.5331, + "step": 210 + }, + { + "epoch": 0.2406940945983767, + "grad_norm": 0.2392578125, + "learning_rate": 0.0001, + "loss": 1.4561, + "step": 215 + }, + { + "epoch": 0.2462916316820599, + "grad_norm": 0.263671875, + "learning_rate": 0.0001, + "loss": 1.5067, + "step": 220 + }, + { + "epoch": 0.2518891687657431, + "grad_norm": 0.21875, + "learning_rate": 0.0001, + "loss": 1.5058, + "step": 225 + }, + { + "epoch": 0.25748670584942623, + "grad_norm": 0.2333984375, + "learning_rate": 0.0001, + "loss": 1.5166, + "step": 230 + }, + { + "epoch": 0.26308424293310945, + "grad_norm": 0.2060546875, + "learning_rate": 0.0001, + "loss": 1.5097, + "step": 235 + }, + { + "epoch": 0.2686817800167926, + "grad_norm": 0.1982421875, + "learning_rate": 0.0001, + "loss": 1.5102, + "step": 240 + }, + { + "epoch": 0.2742793171004758, + "grad_norm": 0.216796875, + "learning_rate": 0.0001, + "loss": 1.4113, + "step": 245 + }, + { + "epoch": 0.279876854184159, + "grad_norm": 0.279296875, + "learning_rate": 0.0001, + "loss": 1.3356, + "step": 250 + }, + { + "epoch": 0.28547439126784213, + "grad_norm": 0.25, + "learning_rate": 0.0001, + "loss": 1.4804, + "step": 255 + }, + { + "epoch": 0.29107192835152534, + "grad_norm": 0.2236328125, + "learning_rate": 0.0001, + "loss": 1.5235, + "step": 260 + }, + { + "epoch": 0.2966694654352085, + "grad_norm": 0.23828125, + "learning_rate": 0.0001, + "loss": 1.5687, + "step": 265 + }, + { + "epoch": 0.3022670025188917, + "grad_norm": 0.220703125, + "learning_rate": 0.0001, + "loss": 1.4548, + "step": 270 + } + ], + "logging_steps": 5, + "max_steps": 450, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 90, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.6839108215693312e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/training_args.bin b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..281d44350bc11cf3e7040f5deeb911bb026435c4 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-270/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de79dec5e578f68fe50f2703c05abcde10ccb18697ac5a3edfec63f4ac7e3b83 +size 7416 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/README.md b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/adapter_config.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a120c2f7228bd25407c0120a8b6f8c00806f84bb --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "k_proj", + "down_proj", + "q_proj", + "up_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/adapter_model.safetensors b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d4bf39abf80d6b8d95e04a576cc94b54119f099 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aefc01e64fb6b80c102cb888f45b817edc2b3102ff40673ac067647422083565 +size 1156480200 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/adapter_model/README.md b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/adapter_model/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/adapter_model/adapter_config.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a120c2f7228bd25407c0120a8b6f8c00806f84bb --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/adapter_model/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "k_proj", + "down_proj", + "q_proj", + "up_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/adapter_model/adapter_model.safetensors b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/adapter_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d4bf39abf80d6b8d95e04a576cc94b54119f099 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/adapter_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aefc01e64fb6b80c102cb888f45b817edc2b3102ff40673ac067647422083565 +size 1156480200 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/added_tokens.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 32016 +} diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/optimizer.pt b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6246092cee34dd8ea2eede035a9f9c90559f84df --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:242114ee7b14157f5301663982929245b6439f4075c4b517bd602bc052476739 +size 2003127538 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/rng_state.pth b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..75607438305a6cd872edd07e5a21a914f698ce0b --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9724b479bcde75696d93ccd1f92c294317abd162382cc656d5dcbb0500c63f6a +size 14244 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/scheduler.pt b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ffb7177b487c41d6b9f78f59fcdd9023706925df --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baba7c5dff09a1d575a7ff0a27f1158d5dd92adec2a108211e3ca605cfdd03a6 +size 1064 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/special_tokens_map.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/special_tokens_map.json @@ -0,0 +1,36 @@ +{ + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/tokenizer.model b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6 +size 500058 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/tokenizer_config.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/tokenizer_config.json @@ -0,0 +1,94 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": " ", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<", + "eot_token": "▁>\\n' + system_message + '\\n< >\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "fill_token": " ", + "legacy": null, + "middle_token": "▁ ", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "[PAD]", + "padding_side": "right", + "prefix_token": "▁ ", + "sp_model_kwargs": {}, + "suffix_first": false, + "suffix_token": "▁", + "tokenizer_class": "CodeLlamaTokenizer", + "unk_token": " ", + "use_default_system_prompt": false +} diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/trainer_state.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1271e902a2f5b0d8b9a18f8f85d55e2d1c942d5a --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/trainer_state.json @@ -0,0 +1,537 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.40302267002518893, + "eval_steps": 500, + "global_step": 360, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00559753708368318, + "grad_norm": 1.2421875, + "learning_rate": 0.0001, + "loss": 4.5193, + "step": 5 + }, + { + "epoch": 0.01119507416736636, + "grad_norm": 0.68359375, + "learning_rate": 0.0001, + "loss": 2.8387, + "step": 10 + }, + { + "epoch": 0.016792611251049538, + "grad_norm": 0.484375, + "learning_rate": 0.0001, + "loss": 2.1966, + "step": 15 + }, + { + "epoch": 0.02239014833473272, + "grad_norm": 0.51953125, + "learning_rate": 0.0001, + "loss": 2.0024, + "step": 20 + }, + { + "epoch": 0.027987685418415897, + "grad_norm": 0.296875, + "learning_rate": 0.0001, + "loss": 1.7735, + "step": 25 + }, + { + "epoch": 0.033585222502099076, + "grad_norm": 0.263671875, + "learning_rate": 0.0001, + "loss": 1.6781, + "step": 30 + }, + { + "epoch": 0.039182759585782254, + "grad_norm": 0.291015625, + "learning_rate": 0.0001, + "loss": 1.6619, + "step": 35 + }, + { + "epoch": 0.04478029666946544, + "grad_norm": 0.3125, + "learning_rate": 0.0001, + "loss": 1.6361, + "step": 40 + }, + { + "epoch": 0.05037783375314862, + "grad_norm": 0.298828125, + "learning_rate": 0.0001, + "loss": 1.6153, + "step": 45 + }, + { + "epoch": 0.055975370836831795, + "grad_norm": 0.306640625, + "learning_rate": 0.0001, + "loss": 1.5201, + "step": 50 + }, + { + "epoch": 0.06157290792051497, + "grad_norm": 0.296875, + "learning_rate": 0.0001, + "loss": 1.5211, + "step": 55 + }, + { + "epoch": 0.06717044500419815, + "grad_norm": 0.2392578125, + "learning_rate": 0.0001, + "loss": 1.5359, + "step": 60 + }, + { + "epoch": 0.07276798208788134, + "grad_norm": 0.251953125, + "learning_rate": 0.0001, + "loss": 1.5686, + "step": 65 + }, + { + "epoch": 0.07836551917156451, + "grad_norm": 0.255859375, + "learning_rate": 0.0001, + "loss": 1.5732, + "step": 70 + }, + { + "epoch": 0.08396305625524769, + "grad_norm": 0.244140625, + "learning_rate": 0.0001, + "loss": 1.5958, + "step": 75 + }, + { + "epoch": 0.08956059333893088, + "grad_norm": 0.25390625, + "learning_rate": 0.0001, + "loss": 1.5006, + "step": 80 + }, + { + "epoch": 0.09515813042261405, + "grad_norm": 0.2890625, + "learning_rate": 0.0001, + "loss": 1.5051, + "step": 85 + }, + { + "epoch": 0.10075566750629723, + "grad_norm": 0.314453125, + "learning_rate": 0.0001, + "loss": 1.5649, + "step": 90 + }, + { + "epoch": 0.1063532045899804, + "grad_norm": 0.263671875, + "learning_rate": 0.0001, + "loss": 1.5, + "step": 95 + }, + { + "epoch": 0.11195074167366359, + "grad_norm": 0.310546875, + "learning_rate": 0.0001, + "loss": 1.4951, + "step": 100 + }, + { + "epoch": 0.11754827875734676, + "grad_norm": 0.2412109375, + "learning_rate": 0.0001, + "loss": 1.5258, + "step": 105 + }, + { + "epoch": 0.12314581584102995, + "grad_norm": 0.2236328125, + "learning_rate": 0.0001, + "loss": 1.556, + "step": 110 + }, + { + "epoch": 0.12874335292471312, + "grad_norm": 0.1982421875, + "learning_rate": 0.0001, + "loss": 1.5306, + "step": 115 + }, + { + "epoch": 0.1343408900083963, + "grad_norm": 0.203125, + "learning_rate": 0.0001, + "loss": 1.4242, + "step": 120 + }, + { + "epoch": 0.1399384270920795, + "grad_norm": 0.2216796875, + "learning_rate": 0.0001, + "loss": 1.4403, + "step": 125 + }, + { + "epoch": 0.14553596417576267, + "grad_norm": 0.2265625, + "learning_rate": 0.0001, + "loss": 1.4604, + "step": 130 + }, + { + "epoch": 0.15113350125944586, + "grad_norm": 0.2275390625, + "learning_rate": 0.0001, + "loss": 1.5809, + "step": 135 + }, + { + "epoch": 0.15673103834312901, + "grad_norm": 0.2421875, + "learning_rate": 0.0001, + "loss": 1.4282, + "step": 140 + }, + { + "epoch": 0.1623285754268122, + "grad_norm": 0.228515625, + "learning_rate": 0.0001, + "loss": 1.5452, + "step": 145 + }, + { + "epoch": 0.16792611251049538, + "grad_norm": 0.359375, + "learning_rate": 0.0001, + "loss": 1.3127, + "step": 150 + }, + { + "epoch": 0.17352364959417857, + "grad_norm": 0.46875, + "learning_rate": 0.0001, + "loss": 1.5287, + "step": 155 + }, + { + "epoch": 0.17912118667786175, + "grad_norm": 0.28515625, + "learning_rate": 0.0001, + "loss": 1.5197, + "step": 160 + }, + { + "epoch": 0.1847187237615449, + "grad_norm": 0.201171875, + "learning_rate": 0.0001, + "loss": 1.5512, + "step": 165 + }, + { + "epoch": 0.1903162608452281, + "grad_norm": 0.2490234375, + "learning_rate": 0.0001, + "loss": 1.4973, + "step": 170 + }, + { + "epoch": 0.19591379792891128, + "grad_norm": 0.220703125, + "learning_rate": 0.0001, + "loss": 1.503, + "step": 175 + }, + { + "epoch": 0.20151133501259447, + "grad_norm": 0.2080078125, + "learning_rate": 0.0001, + "loss": 1.4571, + "step": 180 + }, + { + "epoch": 0.20710887209627762, + "grad_norm": 0.2119140625, + "learning_rate": 0.0001, + "loss": 1.5066, + "step": 185 + }, + { + "epoch": 0.2127064091799608, + "grad_norm": 0.2470703125, + "learning_rate": 0.0001, + "loss": 1.42, + "step": 190 + }, + { + "epoch": 0.218303946263644, + "grad_norm": 0.2275390625, + "learning_rate": 0.0001, + "loss": 1.4306, + "step": 195 + }, + { + "epoch": 0.22390148334732718, + "grad_norm": 0.357421875, + "learning_rate": 0.0001, + "loss": 1.3198, + "step": 200 + }, + { + "epoch": 0.22949902043101036, + "grad_norm": 0.2109375, + "learning_rate": 0.0001, + "loss": 1.4567, + "step": 205 + }, + { + "epoch": 0.23509655751469352, + "grad_norm": 0.265625, + "learning_rate": 0.0001, + "loss": 1.5331, + "step": 210 + }, + { + "epoch": 0.2406940945983767, + "grad_norm": 0.2392578125, + "learning_rate": 0.0001, + "loss": 1.4561, + "step": 215 + }, + { + "epoch": 0.2462916316820599, + "grad_norm": 0.263671875, + "learning_rate": 0.0001, + "loss": 1.5067, + "step": 220 + }, + { + "epoch": 0.2518891687657431, + "grad_norm": 0.21875, + "learning_rate": 0.0001, + "loss": 1.5058, + "step": 225 + }, + { + "epoch": 0.25748670584942623, + "grad_norm": 0.2333984375, + "learning_rate": 0.0001, + "loss": 1.5166, + "step": 230 + }, + { + "epoch": 0.26308424293310945, + "grad_norm": 0.2060546875, + "learning_rate": 0.0001, + "loss": 1.5097, + "step": 235 + }, + { + "epoch": 0.2686817800167926, + "grad_norm": 0.1982421875, + "learning_rate": 0.0001, + "loss": 1.5102, + "step": 240 + }, + { + "epoch": 0.2742793171004758, + "grad_norm": 0.216796875, + "learning_rate": 0.0001, + "loss": 1.4113, + "step": 245 + }, + { + "epoch": 0.279876854184159, + "grad_norm": 0.279296875, + "learning_rate": 0.0001, + "loss": 1.3356, + "step": 250 + }, + { + "epoch": 0.28547439126784213, + "grad_norm": 0.25, + "learning_rate": 0.0001, + "loss": 1.4804, + "step": 255 + }, + { + "epoch": 0.29107192835152534, + "grad_norm": 0.2236328125, + "learning_rate": 0.0001, + "loss": 1.5235, + "step": 260 + }, + { + "epoch": 0.2966694654352085, + "grad_norm": 0.23828125, + "learning_rate": 0.0001, + "loss": 1.5687, + "step": 265 + }, + { + "epoch": 0.3022670025188917, + "grad_norm": 0.220703125, + "learning_rate": 0.0001, + "loss": 1.4548, + "step": 270 + }, + { + "epoch": 0.30786453960257487, + "grad_norm": 0.2041015625, + "learning_rate": 0.0001, + "loss": 1.5667, + "step": 275 + }, + { + "epoch": 0.31346207668625803, + "grad_norm": 0.2060546875, + "learning_rate": 0.0001, + "loss": 1.4787, + "step": 280 + }, + { + "epoch": 0.31905961376994124, + "grad_norm": 0.2216796875, + "learning_rate": 0.0001, + "loss": 1.4374, + "step": 285 + }, + { + "epoch": 0.3246571508536244, + "grad_norm": 0.2138671875, + "learning_rate": 0.0001, + "loss": 1.5262, + "step": 290 + }, + { + "epoch": 0.3302546879373076, + "grad_norm": 0.26953125, + "learning_rate": 0.0001, + "loss": 1.4109, + "step": 295 + }, + { + "epoch": 0.33585222502099077, + "grad_norm": 0.37890625, + "learning_rate": 0.0001, + "loss": 1.307, + "step": 300 + }, + { + "epoch": 0.3414497621046739, + "grad_norm": 0.3671875, + "learning_rate": 0.0001, + "loss": 1.4678, + "step": 305 + }, + { + "epoch": 0.34704729918835714, + "grad_norm": 0.244140625, + "learning_rate": 0.0001, + "loss": 1.5243, + "step": 310 + }, + { + "epoch": 0.3526448362720403, + "grad_norm": 0.23828125, + "learning_rate": 0.0001, + "loss": 1.4596, + "step": 315 + }, + { + "epoch": 0.3582423733557235, + "grad_norm": 0.19921875, + "learning_rate": 0.0001, + "loss": 1.4231, + "step": 320 + }, + { + "epoch": 0.36383991043940667, + "grad_norm": 0.2177734375, + "learning_rate": 0.0001, + "loss": 1.4536, + "step": 325 + }, + { + "epoch": 0.3694374475230898, + "grad_norm": 0.18359375, + "learning_rate": 0.0001, + "loss": 1.4464, + "step": 330 + }, + { + "epoch": 0.37503498460677304, + "grad_norm": 0.2255859375, + "learning_rate": 0.0001, + "loss": 1.4785, + "step": 335 + }, + { + "epoch": 0.3806325216904562, + "grad_norm": 0.2294921875, + "learning_rate": 0.0001, + "loss": 1.4717, + "step": 340 + }, + { + "epoch": 0.38623005877413935, + "grad_norm": 0.2197265625, + "learning_rate": 0.0001, + "loss": 1.3935, + "step": 345 + }, + { + "epoch": 0.39182759585782256, + "grad_norm": 0.2734375, + "learning_rate": 0.0001, + "loss": 1.3039, + "step": 350 + }, + { + "epoch": 0.3974251329415057, + "grad_norm": 0.490234375, + "learning_rate": 0.0001, + "loss": 1.3593, + "step": 355 + }, + { + "epoch": 0.40302267002518893, + "grad_norm": 0.2294921875, + "learning_rate": 0.0001, + "loss": 1.5417, + "step": 360 + } + ], + "logging_steps": 5, + "max_steps": 450, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 90, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.532164009460531e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/training_args.bin b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..281d44350bc11cf3e7040f5deeb911bb026435c4 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-360/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de79dec5e578f68fe50f2703c05abcde10ccb18697ac5a3edfec63f4ac7e3b83 +size 7416 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/README.md b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/adapter_config.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a120c2f7228bd25407c0120a8b6f8c00806f84bb --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "k_proj", + "down_proj", + "q_proj", + "up_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/adapter_model.safetensors b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..335e243fe95383b025f09d64ffad2b7d0c5d06f0 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd737274c5b5c85845267001c23404f2f66f4ac2ad483b3c4f882e25a24de13e +size 1156480200 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/adapter_model/README.md b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c480e52daf505882c9f03a4e75fe30ea8c22c2c --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/adapter_model/README.md @@ -0,0 +1,202 @@ +--- +base_model: CodeLlama-13b-Instruct-hf/ +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.2 \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/adapter_model/adapter_config.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a120c2f7228bd25407c0120a8b6f8c00806f84bb --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/adapter_model/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "CodeLlama-13b-Instruct-hf/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "k_proj", + "down_proj", + "q_proj", + "up_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/adapter_model/adapter_model.safetensors b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/adapter_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..335e243fe95383b025f09d64ffad2b7d0c5d06f0 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/adapter_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd737274c5b5c85845267001c23404f2f66f4ac2ad483b3c4f882e25a24de13e +size 1156480200 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/added_tokens.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..1cbbe5179eb8b5cc46632bbbc00eb51c68847074 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 32016 +} diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/optimizer.pt b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f2bf52ffcd0dc6a0d9626c32710ca617b1651ec --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2af20277680390b5fe58b2f08d15a8be7b6d79c8fc6e8e9259e8bc5e253fca7 +size 2003127538 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/rng_state.pth b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..75607438305a6cd872edd07e5a21a914f698ce0b --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9724b479bcde75696d93ccd1f92c294317abd162382cc656d5dcbb0500c63f6a +size 14244 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/scheduler.pt b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5b95bc48aced6514998ca04f85182a6f50b3ae5 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94c8e40d3e998ff2e64b4e5b87135c84483399e6a8b1fe73e89c05c4855cb1f5 +size 1064 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/special_tokens_map.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..330bb0c14209dcd402b155e7d58c6c2b5210d40d --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/special_tokens_map.json @@ -0,0 +1,36 @@ +{ + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/tokenizer.model b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..f6722e8b170230ebdd4c0f5f2ce03f219be536d4 --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6 +size 500058 diff --git a/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/tokenizer_config.json b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8f4094d204e2be0ed7b6bfa83d20cff28326258d --- /dev/null +++ b/codellama/c/codesum/codesum_c_callgraph/checkpoint-450/tokenizer_config.json @@ -0,0 +1,94 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": " ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32007": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32008": { + "content": "▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32009": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32010": { + "content": "▁ ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32016": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "▁ ", + "▁", + "▁ ", + "▁ " + ], + "bos_token": " ", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<