End of training

Browse files

Files changed (13) hide show

README.md +111 -0
adapter_config.json +29 -0
adapter_model.safetensors +3 -0
added_tokens.json +3 -0
arf/scratch/mpekey/Progen2/lora_checkpoints/README.md +202 -0
arf/scratch/mpekey/Progen2/lora_checkpoints/adapter_config.json +29 -0
arf/scratch/mpekey/Progen2/lora_checkpoints/adapter_model.safetensors +3 -0
merges.txt +1 -0
special_tokens_map.json +6 -0
tokenizer.json +105 -0
tokenizer_config.json +45 -0
training_args.bin +3 -0
vocab.json +1 -0

README.md ADDED Viewed

	@@ -0,0 +1,111 @@

+---
+library_name: peft
+license: bsd-3-clause
+base_model: hugohrban/progen2-base
+tags:
+- generated_from_trainer
+model-index:
+- name: Progen2_Kinase_PhosphositeGen
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# Progen2_Kinase_PhosphositeGen
+This model is a fine-tuned version of [hugohrban/progen2-base](https://huggingface.co/hugohrban/progen2-base) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 2.0025
+- Perplexity: 7.4078
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0005
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 16
+- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: linear
+- training_steps: 5000
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss | Perplexity |
+|:-------------:|:------:|:----:|:---------------:|:----------:|
+| 4.9303        | 0.1415 | 100  | 2.1811          | 8.8564     |
+| 4.2874        | 0.2831 | 200  | 2.1198          | 8.3296     |
+| 4.1941        | 0.4246 | 300  | 2.0694          | 7.9202     |
+| 4.0803        | 0.5662 | 400  | 2.0362          | 7.6616     |
+| 4.0613        | 0.7077 | 500  | 2.0053          | 7.4284     |
+| 3.9511        | 0.8493 | 600  | 1.9922          | 7.3315     |
+| 3.9216        | 0.9908 | 700  | 1.9477          | 7.0124     |
+| 3.5053        | 1.1316 | 800  | 1.9200          | 6.8208     |
+| 3.4311        | 1.2732 | 900  | 1.9035          | 6.7094     |
+| 3.4238        | 1.4147 | 1000 | 1.8714          | 6.4974     |
+| 3.392         | 1.5563 | 1100 | 1.8527          | 6.3772     |
+| 3.3621        | 1.6978 | 1200 | 1.8317          | 6.2444     |
+| 3.3577        | 1.8393 | 1300 | 1.8237          | 6.1945     |
+| 3.3419        | 1.9809 | 1400 | 1.7889          | 5.9826     |
+| 2.8256        | 2.1217 | 1500 | 1.7977          | 6.0356     |
+| 2.8061        | 2.2633 | 1600 | 1.7860          | 5.9653     |
+| 2.7837        | 2.4048 | 1700 | 1.7666          | 5.8507     |
+| 2.7504        | 2.5464 | 1800 | 1.7428          | 5.7133     |
+| 2.829         | 2.6879 | 1900 | 1.7288          | 5.6337     |
+| 2.7567        | 2.8294 | 2000 | 1.7088          | 5.5225     |
+| 2.7443        | 2.9710 | 2100 | 1.6986          | 5.4664     |
+| 2.3409        | 3.1118 | 2200 | 1.7382          | 5.6869     |
+| 2.2568        | 3.2534 | 2300 | 1.7487          | 5.7471     |
+| 2.2481        | 3.3949 | 2400 | 1.7181          | 5.5740     |
+| 2.2323        | 3.5364 | 2500 | 1.7058          | 5.5059     |
+| 2.2654        | 3.6780 | 2600 | 1.7031          | 5.4912     |
+| 2.2611        | 3.8195 | 2700 | 1.6707          | 5.3157     |
+| 2.256         | 3.9611 | 2800 | 1.6719          | 5.3222     |
+| 1.8849        | 4.1019 | 2900 | 1.7899          | 5.9886     |
+| 1.771         | 4.2435 | 3000 | 1.7697          | 5.8694     |
+| 1.7992        | 4.3850 | 3100 | 1.7880          | 5.9775     |
+| 1.838         | 4.5265 | 3200 | 1.7871          | 5.9722     |
+| 1.8285        | 4.6681 | 3300 | 1.7342          | 5.6644     |
+| 1.8127        | 4.8096 | 3400 | 1.7196          | 5.5825     |
+| 1.8353        | 4.9512 | 3500 | 1.7471          | 5.7377     |
+| 1.5511        | 5.0920 | 3600 | 1.8285          | 6.2248     |
+| 1.4449        | 5.2335 | 3700 | 1.8683          | 6.4770     |
+| 1.4631        | 5.3751 | 3800 | 1.8880          | 6.6063     |
+| 1.4525        | 5.5166 | 3900 | 1.8807          | 6.5581     |
+| 1.4516        | 5.6582 | 4000 | 1.8723          | 6.5031     |
+| 1.4423        | 5.7997 | 4100 | 1.8828          | 6.5716     |
+| 1.4626        | 5.9413 | 4200 | 1.8535          | 6.3824     |
+| 1.3065        | 6.0821 | 4300 | 1.9369          | 6.9369     |
+| 1.1889        | 6.2236 | 4400 | 1.9767          | 7.2191     |
+| 1.1865        | 6.3652 | 4500 | 1.9845          | 7.2752     |
+| 1.1927        | 6.5067 | 4600 | 2.0029          | 7.4103     |
+| 1.1937        | 6.6483 | 4700 | 1.9931          | 7.3380     |
+| 1.1893        | 6.7898 | 4800 | 1.9814          | 7.2532     |
+| 1.1654        | 6.9314 | 4900 | 1.9931          | 7.3383     |
+| 1.1036        | 7.0722 | 5000 | 2.0025          | 7.4078     |
+### Framework versions
+- PEFT 0.13.2
+- Transformers 4.47.1
+- Pytorch 2.1.0.post301
+- Datasets 3.0.2
+- Tokenizers 0.21.0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "hugohrban/progen2-base",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "out_proj",
+    "qkv_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc764ad0e36644d505b5097c0a0ec75f813161839a4ee95646cfae3ea7024e1a
+size 127416480

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<|endoftext|>": 30
+}

arf/scratch/mpekey/Progen2/lora_checkpoints/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: hugohrban/progen2-base
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.13.2

arf/scratch/mpekey/Progen2/lora_checkpoints/adapter_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "hugohrban/progen2-base",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "out_proj",
+    "qkv_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

arf/scratch/mpekey/Progen2/lora_checkpoints/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc764ad0e36644d505b5097c0a0ec75f813161839a4ee95646cfae3ea7024e1a
+size 127416480

merges.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ #version: 0.2

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "pad_token": "<|pad|>",
+  "unk_token": "<|endoftext|>"
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,105 @@

+{
+  "version": "1.0",
+  "truncation": null,
+  "padding": null,
+  "added_tokens": [
+    {
+      "id": 0,
+      "content": "<|pad|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 1,
+      "content": "<|bos|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 2,
+      "content": "<|eos|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 30,
+      "content": "<|endoftext|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    }
+  ],
+  "normalizer": null,
+  "pre_tokenizer": {
+    "type": "ByteLevel",
+    "add_prefix_space": false,
+    "trim_offsets": true,
+    "use_regex": true
+  },
+  "post_processor": {
+    "type": "ByteLevel",
+    "add_prefix_space": true,
+    "trim_offsets": true,
+    "use_regex": true
+  },
+  "decoder": {
+    "type": "ByteLevel",
+    "add_prefix_space": true,
+    "trim_offsets": true,
+    "use_regex": true
+  },
+  "model": {
+    "type": "BPE",
+    "dropout": null,
+    "unk_token": null,
+    "continuing_subword_prefix": null,
+    "end_of_word_suffix": null,
+    "fuse_unk": false,
+    "byte_fallback": false,
+    "ignore_merges": false,
+    "vocab": {
+      "<|pad|>": 0,
+      "<|bos|>": 1,
+      "<|eos|>": 2,
+      "1": 3,
+      "2": 4,
+      "A": 5,
+      "B": 6,
+      "C": 7,
+      "D": 8,
+      "E": 9,
+      "F": 10,
+      "G": 11,
+      "H": 12,
+      "I": 13,
+      "K": 14,
+      "L": 15,
+      "M": 16,
+      "N": 17,
+      "O": 18,
+      "P": 19,
+      "Q": 20,
+      "R": 21,
+      "S": 22,
+      "T": 23,
+      "U": 24,
+      "V": 25,
+      "W": 26,
+      "X": 27,
+      "Y": 28,
+      "Z": 29
+    },
+    "merges": []
+  }
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<|bos|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<|eos|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<|pad|>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3408b402ce47cb1ce453eb9ff4850571a282e7114777fbc4143e9a8dbd8ae6d8
+size 5432

vocab.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"<\|pad\|>":0,"<\|bos\|>":1,"<\|eos\|>":2,"1":3,"2":4,"A":5,"B":6,"C":7,"D":8,"E":9,"F":10,"G":11,"H":12,"I":13,"K":14,"L":15,"M":16,"N":17,"O":18,"P":19,"Q":20,"R":21,"S":22,"T":23,"U":24,"V":25,"W":26,"X":27,"Y":28,"Z":29}