qwen-OpenAssistant/oasst_top1_2023-08-25

Browse files

Files changed (7) hide show

README.md +106 -0
adapter_config.json +32 -0
adapter_model.bin +3 -0
qwen.tiktoken +0 -0
special_tokens_map.json +4 -0
tokenizer_config.json +14 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,106 @@

+---
+base_model: Qwen/Qwen-14B
+tags:
+- generated_from_trainer
+model-index:
+- name: OpenAssistant_oasst_top1_2023-08-25
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# OpenAssistant_oasst_top1_2023-08-25
+This model is a fine-tuned version of [Qwen/Qwen-14B](https://huggingface.co/Qwen/Qwen-14B) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.6972
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 1e-05
+- train_batch_size: 2
+- eval_batch_size: 2
+- seed: 42
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 16
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 0.01
+- num_epochs: 1
+### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 1.977         | 0.02  | 16   | 1.9487          |
+| 1.7729        | 0.04  | 32   | 1.9455          |
+| 1.8185        | 0.06  | 48   | 1.9395          |
+| 1.8375        | 0.08  | 64   | 1.9311          |
+| 1.8803        | 0.1   | 80   | 1.9205          |
+| 1.754         | 0.12  | 96   | 1.9093          |
+| 1.691         | 0.14  | 112  | 1.8976          |
+| 1.7817        | 0.17  | 128  | 1.8860          |
+| 1.7482        | 0.19  | 144  | 1.8742          |
+| 1.8528        | 0.21  | 160  | 1.8616          |
+| 1.7618        | 0.23  | 176  | 1.8486          |
+| 1.7428        | 0.25  | 192  | 1.8356          |
+| 1.6991        | 0.27  | 208  | 1.8208          |
+| 1.7041        | 0.29  | 224  | 1.8058          |
+| 1.7153        | 0.31  | 240  | 1.7919          |
+| 1.7312        | 0.33  | 256  | 1.7777          |
+| 1.6665        | 0.35  | 272  | 1.7658          |
+| 1.6596        | 0.37  | 288  | 1.7567          |
+| 1.7081        | 0.39  | 304  | 1.7492          |
+| 1.6424        | 0.41  | 320  | 1.7407          |
+| 1.6447        | 0.43  | 336  | 1.7341          |
+| 1.7134        | 0.45  | 352  | 1.7285          |
+| 1.6241        | 0.47  | 368  | 1.7230          |
+| 1.706         | 0.5   | 384  | 1.7193          |
+| 1.7142        | 0.52  | 400  | 1.7156          |
+| 1.6345        | 0.54  | 416  | 1.7122          |
+| 1.6012        | 0.56  | 432  | 1.7097          |
+| 1.6742        | 0.58  | 448  | 1.7080          |
+| 1.6555        | 0.6   | 464  | 1.7073          |
+| 1.6765        | 0.62  | 480  | 1.7047          |
+| 1.5234        | 0.64  | 496  | 1.7034          |
+| 1.5538        | 0.66  | 512  | 1.7025          |
+| 1.669         | 0.68  | 528  | 1.7015          |
+| 1.5509        | 0.7   | 544  | 1.7007          |
+| 1.5485        | 0.72  | 560  | 1.7002          |
+| 1.6374        | 0.74  | 576  | 1.6993          |
+| 1.6434        | 0.76  | 592  | 1.6986          |
+| 1.6832        | 0.78  | 608  | 1.6983          |
+| 1.6734        | 0.8   | 624  | 1.6979          |
+| 1.6463        | 0.83  | 640  | 1.6979          |
+| 1.5761        | 0.85  | 656  | 1.6976          |
+| 1.5689        | 0.87  | 672  | 1.6976          |
+| 1.6393        | 0.89  | 688  | 1.6975          |
+| 1.6735        | 0.91  | 704  | 1.6974          |
+| 1.5709        | 0.93  | 720  | 1.6974          |
+| 1.7068        | 0.95  | 736  | 1.6971          |
+| 1.5955        | 0.97  | 752  | 1.6973          |
+| 1.7114        | 0.99  | 768  | 1.6972          |
+### Framework versions
+- Transformers 4.35.0.dev0
+- Pytorch 2.1.0+cu121
+- Datasets 2.5.2
+- Tokenizers 0.14.0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen-14B",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": [
+    30,
+    31,
+    32,
+    33,
+    34,
+    35,
+    36,
+    37,
+    38,
+    39
+  ],
+  "lora_alpha": 16,
+  "lora_dropout": 0.1,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "c_attn",
+    "c_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fe57fb43ef81d710ff587e9fde0aa1806b15855f5a72bf2f91781f6e371feca2
+size 15873058

qwen.tiktoken ADDED Viewed

The diff for this file is too large to render. See raw diff

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "eos_token": "<|endoftext|>",
+  "pad_token": "<|endoftext|>"
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "added_tokens_decoder": {},
+  "additional_special_tokens": [],
+  "auto_map": {
+    "AutoTokenizer": [
+      "Qwen/Qwen-14B--tokenization_qwen.QWenTokenizer",
+      null
+    ]
+  },
+  "clean_up_tokenization_spaces": true,
+  "model_max_length": 8192,
+  "tokenizer_class": "QWenTokenizer",
+  "tokenizer_file": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed85ceb0a1bd3f5902286ef3d1b9d7aca7d02fb43e23a03da780749c7917d535
+size 4600