mashishka commited on Apr 13, 2024

Commit

eae9f0a

verified ·

1 Parent(s): e529708

Upload 45 files

Browse files

rugpt3small дообучена на корпусе депрессяшек

Files changed (45) hide show

README.md +52 -0
checkpoint-1000/config.json +41 -0
checkpoint-1000/generation_config.json +7 -0
checkpoint-1000/optimizer.pt +3 -0
checkpoint-1000/rng_state.pth +3 -0
checkpoint-1000/scheduler.pt +3 -0
checkpoint-1000/trainer_state.json +35 -0
checkpoint-1000/training_args.bin +3 -0
checkpoint-1500/config.json +41 -0
checkpoint-1500/generation_config.json +7 -0
checkpoint-1500/model.safetensors +3 -0
checkpoint-1500/optimizer.pt +3 -0
checkpoint-1500/rng_state.pth +3 -0
checkpoint-1500/scheduler.pt +3 -0
checkpoint-1500/trainer_state.json +42 -0
checkpoint-1500/training_args.bin +3 -0
checkpoint-2000/config.json +41 -0
checkpoint-2000/generation_config.json +7 -0
checkpoint-2000/model.safetensors +3 -0
checkpoint-2000/optimizer.pt +3 -0
checkpoint-2000/rng_state.pth +3 -0
checkpoint-2000/scheduler.pt +3 -0
checkpoint-2000/trainer_state.json +49 -0
checkpoint-2000/training_args.bin +3 -0
checkpoint-2500/config.json +41 -0
checkpoint-2500/generation_config.json +7 -0
checkpoint-2500/model.safetensors +3 -0
checkpoint-2500/optimizer.pt +3 -0
checkpoint-2500/rng_state.pth +3 -0
checkpoint-2500/scheduler.pt +3 -0
checkpoint-2500/trainer_state.json +56 -0
checkpoint-2500/training_args.bin +3 -0
checkpoint-500/config.json +41 -0
checkpoint-500/generation_config.json +7 -0
checkpoint-500/optimizer.pt +3 -0
checkpoint-500/rng_state.pth +3 -0
checkpoint-500/scheduler.pt +3 -0
checkpoint-500/trainer_state.json +28 -0
checkpoint-500/training_args.bin +3 -0
config.json +41 -0
generation_config.json +7 -0
model.safetensors +3 -0
runs/Apr13_05-49-15_e3cdf6043cc1/events.out.tfevents.1712987355.e3cdf6043cc1.332.1 +3 -0
runs/Apr13_05-52-47_e3cdf6043cc1/events.out.tfevents.1712987568.e3cdf6043cc1.332.2 +3 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,52 @@

+---
+base_model: ai-forever/rugpt3small_based_on_gpt2
+tags:
+- generated_from_trainer
+model-index:
+- name: poetry-rugpt3small
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# poetry-rugpt3small
+This model is a fine-tuned version of [ai-forever/rugpt3small_based_on_gpt2](https://huggingface.co/ai-forever/rugpt3small_based_on_gpt2) on an unknown dataset.
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0002
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 3
+- total_train_batch_size: 24
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 4
+### Training results
+### Framework versions
+- Transformers 4.38.2
+- Pytorch 2.2.1+cu121
+- Tokenizers 0.15.2

checkpoint-1000/config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "_name_or_path": "ai-forever/rugpt3small_based_on_gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 1,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 2048,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 2048,
+  "pad_token_id": 0,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "vocab_size": 50264
+}

checkpoint-1000/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "pad_token_id": 0,
+  "transformers_version": "4.38.2"
+}

checkpoint-1000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37cb865b074caec89ac5294c8bb6e7c8a294546b08d1bb3cb0c36ce17d63d3ba
+size 1388158

checkpoint-1000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e121ffa3253328f1eff6245d7eacd2c1dedcae0837e7fa49498d1684f9622f5e
+size 14168

checkpoint-1000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4812307e94efb2f137ca5c1953b5b0ff829062afd8484a4b5f0fbde1a91e36c
+size 1056

checkpoint-1000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.4091122592766556,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7,
+      "grad_norm": 2.76055908203125,
+      "learning_rate": 0.00016473906911142455,
+      "loss": 3.9965,
+      "step": 500
+    },
+    {
+      "epoch": 1.41,
+      "grad_norm": 2.5890133380889893,
+      "learning_rate": 0.00012947813822284908,
+      "loss": 3.3018,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2836,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 391921717248000.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6505f97547c48a1103750e9e7198c48bcaf26b74eb97216d25dcbdea6812f6b3
+size 4896

checkpoint-1500/config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "_name_or_path": "ai-forever/rugpt3small_based_on_gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 1,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 2048,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 2048,
+  "pad_token_id": 0,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "vocab_size": 50264
+}

checkpoint-1500/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "pad_token_id": 0,
+  "transformers_version": "4.38.2"
+}

checkpoint-1500/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b74bc60dab96ef0b2acf830908cd0525031f557a7489e53f162a1add329737e5
+size 500941440

checkpoint-1500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ca317b0bea5569c774d37d3047c5b02d5fc6697ea9b89164d9fe1605e322c28
+size 1388158

checkpoint-1500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1cd215dc687de9fce2adf066fc0a921e6a64e10dd21038a7f121d72ad6bd7314
+size 14168

checkpoint-1500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b45dbeb090b7e47dab2b3373891d65a6f597444666dbcc40beff73e873f0914a
+size 1056

checkpoint-1500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.1136683889149834,
+  "eval_steps": 500,
+  "global_step": 1500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7,
+      "grad_norm": 2.76055908203125,
+      "learning_rate": 0.00016473906911142455,
+      "loss": 3.9965,
+      "step": 500
+    },
+    {
+      "epoch": 1.41,
+      "grad_norm": 2.5890133380889893,
+      "learning_rate": 0.00012947813822284908,
+      "loss": 3.3018,
+      "step": 1000
+    },
+    {
+      "epoch": 2.11,
+      "grad_norm": 2.6693315505981445,
+      "learning_rate": 9.421720733427363e-05,
+      "loss": 2.8988,
+      "step": 1500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2836,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 587874410496000.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6505f97547c48a1103750e9e7198c48bcaf26b74eb97216d25dcbdea6812f6b3
+size 4896

checkpoint-2000/config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "_name_or_path": "ai-forever/rugpt3small_based_on_gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 1,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 2048,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 2048,
+  "pad_token_id": 0,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "vocab_size": 50264
+}

checkpoint-2000/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "pad_token_id": 0,
+  "transformers_version": "4.38.2"
+}

checkpoint-2000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:736ef1316fe65f10219acc133d2d71087903c158af67d91e793f9bd8de397413
+size 500941440

checkpoint-2000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:27d4d6cb328da6a8244ea3a2c942cee72774ff18eea7993c161406a6bb3e8fca
+size 1388158

checkpoint-2000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8cc233f77f651a7ac95f48bc181d627820f2e9b89a2e99d9d3e32b4cc49d8a86
+size 14168

checkpoint-2000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e8ee96adef4a71f3243de85066da41273d922b6147757f524ad99764797a7d8
+size 1056

checkpoint-2000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.8182245185533112,
+  "eval_steps": 500,
+  "global_step": 2000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7,
+      "grad_norm": 2.76055908203125,
+      "learning_rate": 0.00016473906911142455,
+      "loss": 3.9965,
+      "step": 500
+    },
+    {
+      "epoch": 1.41,
+      "grad_norm": 2.5890133380889893,
+      "learning_rate": 0.00012947813822284908,
+      "loss": 3.3018,
+      "step": 1000
+    },
+    {
+      "epoch": 2.11,
+      "grad_norm": 2.6693315505981445,
+      "learning_rate": 9.421720733427363e-05,
+      "loss": 2.8988,
+      "step": 1500
+    },
+    {
+      "epoch": 2.82,
+      "grad_norm": 3.1421236991882324,
+      "learning_rate": 5.8956276445698163e-05,
+      "loss": 2.3929,
+      "step": 2000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2836,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 783843434496000.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-2000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6505f97547c48a1103750e9e7198c48bcaf26b74eb97216d25dcbdea6812f6b3
+size 4896

checkpoint-2500/config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "_name_or_path": "ai-forever/rugpt3small_based_on_gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 1,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 2048,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 2048,
+  "pad_token_id": 0,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "vocab_size": 50264
+}

checkpoint-2500/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "pad_token_id": 0,
+  "transformers_version": "4.38.2"
+}

checkpoint-2500/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53c45d85117a64a54ff73e4866b797410164a8d56a769b4a8e2eb698ffccf3d2
+size 500941440

checkpoint-2500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5322e35104def8f18c9f5306b719be2b042f78de65a24f2900586fe19924f709
+size 1388158

checkpoint-2500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a04a8ee606b906d536d7725250ff91cd3c81b932f89d623d0f0956c076d68a2f
+size 14168

checkpoint-2500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24c73de17b528e3ce1bd60b59da5ca7d31e43f676c99b8f33dd080550a88b87f
+size 1056

checkpoint-2500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.522780648191639,
+  "eval_steps": 500,
+  "global_step": 2500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7,
+      "grad_norm": 2.76055908203125,
+      "learning_rate": 0.00016473906911142455,
+      "loss": 3.9965,
+      "step": 500
+    },
+    {
+      "epoch": 1.41,
+      "grad_norm": 2.5890133380889893,
+      "learning_rate": 0.00012947813822284908,
+      "loss": 3.3018,
+      "step": 1000
+    },
+    {
+      "epoch": 2.11,
+      "grad_norm": 2.6693315505981445,
+      "learning_rate": 9.421720733427363e-05,
+      "loss": 2.8988,
+      "step": 1500
+    },
+    {
+      "epoch": 2.82,
+      "grad_norm": 3.1421236991882324,
+      "learning_rate": 5.8956276445698163e-05,
+      "loss": 2.3929,
+      "step": 2000
+    },
+    {
+      "epoch": 3.52,
+      "grad_norm": 2.9969077110290527,
+      "learning_rate": 2.3695345557122707e-05,
+      "loss": 2.0648,
+      "step": 2500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2836,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 979796127744000.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-2500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6505f97547c48a1103750e9e7198c48bcaf26b74eb97216d25dcbdea6812f6b3
+size 4896

checkpoint-500/config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "_name_or_path": "ai-forever/rugpt3small_based_on_gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 1,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 2048,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 2048,
+  "pad_token_id": 0,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "vocab_size": 50264
+}

checkpoint-500/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "pad_token_id": 0,
+  "transformers_version": "4.38.2"
+}

checkpoint-500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a00378c941bc6126014eeef7950960397719b20b8c67a582f353eedc8cb8375
+size 1388158

checkpoint-500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4af4d83d8c0108d3aa94ee44da238a0a9a61f03a5e9dd4f3e8a4b70c75748a3f
+size 14168

checkpoint-500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:84dd3d57acb3fd08cf92f7801193f24ea66bbda53a190defa8e55f5ea31783e4
+size 1056

checkpoint-500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.7045561296383278,
+  "eval_steps": 500,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7,
+      "grad_norm": 2.76055908203125,
+      "learning_rate": 0.00016473906911142455,
+      "loss": 3.9965,
+      "step": 500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2836,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 195969024000000.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6505f97547c48a1103750e9e7198c48bcaf26b74eb97216d25dcbdea6812f6b3
+size 4896

config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "_name_or_path": "ai-forever/rugpt3small_based_on_gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 1,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 2048,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 2048,
+  "pad_token_id": 0,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "vocab_size": 50264
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "pad_token_id": 0,
+  "transformers_version": "4.38.2"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03dff07ae63677639709968c7bb7bf7fd697d4d09009f58689244dc145bb5665
+size 500941440

runs/Apr13_05-49-15_e3cdf6043cc1/events.out.tfevents.1712987355.e3cdf6043cc1.332.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f5e167e7468675807dd1f7696c7c08d2a5093449dd452571b72089ad0e675d7
+size 5280

runs/Apr13_05-52-47_e3cdf6043cc1/events.out.tfevents.1712987568.e3cdf6043cc1.332.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81a78146098a7bea7667e4048641c1fb14c5d122515d1f022686331449ec2a05
+size 6468

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6505f97547c48a1103750e9e7198c48bcaf26b74eb97216d25dcbdea6812f6b3
+size 4896