add model

Browse files

Files changed (3) hide show

README.md +63 -0
config.json +54 -0
pytorch_model.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,63 @@

+---
+model-index:
+- name: test-clm
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# test-clm
+This model was trained from scratch on an unkown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 2.7783
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine_with_restarts
+- lr_scheduler_warmup_steps: 500
+- num_epochs: 10
+### Training results
+| Training Loss | Epoch | Step   | Validation Loss |
+|:-------------:|:-----:|:------:|:---------------:|
+| 3.8321        | 1.0   | 18412  | 3.8074          |
+| 3.4965        | 2.0   | 36824  | 3.4223          |
+| 3.1927        | 3.0   | 55236  | 3.0815          |
+| 3.046         | 4.0   | 73648  | 2.9270          |
+| 2.9781        | 5.0   | 92060  | 2.8515          |
+| 2.9398        | 6.0   | 110472 | 2.8082          |
+| 2.9293        | 7.0   | 128884 | 2.7904          |
+| 2.9212        | 8.0   | 147296 | 2.7817          |
+| 2.9169        | 9.0   | 165708 | 2.7787          |
+| 2.9197        | 10.0  | 184120 | 2.7783          |
+### Framework versions
+- Transformers 4.6.1
+- Pytorch 1.9.0
+- Datasets 1.2.1
+- Tokenizers 0.10.3

config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "architectures": [
+    "ReformerModelWithLMHead"
+  ],
+  "attention_head_size": 64,
+  "attn_layers": [
+    "local",
+    "lsh",
+    "local",
+    "lsh",
+    "local",
+    "lsh"
+  ],
+  "axial_norm_std": 1.0,
+  "axial_pos_embds": true,
+  "axial_pos_embds_dim": [
+    64,
+    192
+  ],
+  "axial_pos_shape": [
+    64,
+    64
+  ],
+  "chunk_size_lm_head": 0,
+  "eos_token_id": 2,
+  "feed_forward_size": 512,
+  "hash_seed": null,
+  "hidden_act": "gelu_new",
+  "hidden_dropout_prob": 0.025,
+  "hidden_size": 256,
+  "initializer_range": 0.02,
+  "is_decoder": true,
+  "layer_norm_eps": 1e-12,
+  "local_attention_probs_dropout_prob": 0.025,
+  "local_attn_chunk_length": 64,
+  "local_num_chunks_after": 0,
+  "local_num_chunks_before": 1,
+  "lsh_attention_probs_dropout_prob": 0.0,
+  "lsh_attn_chunk_length": 64,
+  "lsh_num_chunks_after": 0,
+  "lsh_num_chunks_before": 1,
+  "max_position_embeddings": 4096,
+  "model_type": "reformer",
+  "num_attention_heads": 2,
+  "num_buckets": 128,
+  "num_hashes": 8,
+  "num_hidden_layers": 6,
+  "pad_token_id": 0,
+  "sinusoidal_pos_embds": false,
+  "tie_word_embeddings": false,
+  "transformers_version": "4.6.1",
+  "use_cache": true,
+  "vocab_size": 320
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c477830ffc24a2023c9623d499055aadd2d3f6e716b38c0df8b4aab843dc8e8
+size 10179275