mashishka commited on
Commit
eae9f0a
·
verified ·
1 Parent(s): e529708

Upload 45 files

Browse files

rugpt3small дообучена на корпусе депрессяшек

Files changed (45) hide show
  1. README.md +52 -0
  2. checkpoint-1000/config.json +41 -0
  3. checkpoint-1000/generation_config.json +7 -0
  4. checkpoint-1000/optimizer.pt +3 -0
  5. checkpoint-1000/rng_state.pth +3 -0
  6. checkpoint-1000/scheduler.pt +3 -0
  7. checkpoint-1000/trainer_state.json +35 -0
  8. checkpoint-1000/training_args.bin +3 -0
  9. checkpoint-1500/config.json +41 -0
  10. checkpoint-1500/generation_config.json +7 -0
  11. checkpoint-1500/model.safetensors +3 -0
  12. checkpoint-1500/optimizer.pt +3 -0
  13. checkpoint-1500/rng_state.pth +3 -0
  14. checkpoint-1500/scheduler.pt +3 -0
  15. checkpoint-1500/trainer_state.json +42 -0
  16. checkpoint-1500/training_args.bin +3 -0
  17. checkpoint-2000/config.json +41 -0
  18. checkpoint-2000/generation_config.json +7 -0
  19. checkpoint-2000/model.safetensors +3 -0
  20. checkpoint-2000/optimizer.pt +3 -0
  21. checkpoint-2000/rng_state.pth +3 -0
  22. checkpoint-2000/scheduler.pt +3 -0
  23. checkpoint-2000/trainer_state.json +49 -0
  24. checkpoint-2000/training_args.bin +3 -0
  25. checkpoint-2500/config.json +41 -0
  26. checkpoint-2500/generation_config.json +7 -0
  27. checkpoint-2500/model.safetensors +3 -0
  28. checkpoint-2500/optimizer.pt +3 -0
  29. checkpoint-2500/rng_state.pth +3 -0
  30. checkpoint-2500/scheduler.pt +3 -0
  31. checkpoint-2500/trainer_state.json +56 -0
  32. checkpoint-2500/training_args.bin +3 -0
  33. checkpoint-500/config.json +41 -0
  34. checkpoint-500/generation_config.json +7 -0
  35. checkpoint-500/optimizer.pt +3 -0
  36. checkpoint-500/rng_state.pth +3 -0
  37. checkpoint-500/scheduler.pt +3 -0
  38. checkpoint-500/trainer_state.json +28 -0
  39. checkpoint-500/training_args.bin +3 -0
  40. config.json +41 -0
  41. generation_config.json +7 -0
  42. model.safetensors +3 -0
  43. runs/Apr13_05-49-15_e3cdf6043cc1/events.out.tfevents.1712987355.e3cdf6043cc1.332.1 +3 -0
  44. runs/Apr13_05-52-47_e3cdf6043cc1/events.out.tfevents.1712987568.e3cdf6043cc1.332.2 +3 -0
  45. training_args.bin +3 -0
README.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: ai-forever/rugpt3small_based_on_gpt2
3
+ tags:
4
+ - generated_from_trainer
5
+ model-index:
6
+ - name: poetry-rugpt3small
7
+ results: []
8
+ ---
9
+
10
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
11
+ should probably proofread and complete it, then remove this comment. -->
12
+
13
+ # poetry-rugpt3small
14
+
15
+ This model is a fine-tuned version of [ai-forever/rugpt3small_based_on_gpt2](https://huggingface.co/ai-forever/rugpt3small_based_on_gpt2) on an unknown dataset.
16
+
17
+ ## Model description
18
+
19
+ More information needed
20
+
21
+ ## Intended uses & limitations
22
+
23
+ More information needed
24
+
25
+ ## Training and evaluation data
26
+
27
+ More information needed
28
+
29
+ ## Training procedure
30
+
31
+ ### Training hyperparameters
32
+
33
+ The following hyperparameters were used during training:
34
+ - learning_rate: 0.0002
35
+ - train_batch_size: 8
36
+ - eval_batch_size: 8
37
+ - seed: 42
38
+ - gradient_accumulation_steps: 3
39
+ - total_train_batch_size: 24
40
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
41
+ - lr_scheduler_type: linear
42
+ - num_epochs: 4
43
+
44
+ ### Training results
45
+
46
+
47
+
48
+ ### Framework versions
49
+
50
+ - Transformers 4.38.2
51
+ - Pytorch 2.2.1+cu121
52
+ - Tokenizers 0.15.2
checkpoint-1000/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ai-forever/rugpt3small_based_on_gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 1,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 2,
11
+ "gradient_checkpointing": false,
12
+ "id2label": {
13
+ "0": "LABEL_0"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "label2id": {
17
+ "LABEL_0": 0
18
+ },
19
+ "layer_norm_epsilon": 1e-05,
20
+ "model_type": "gpt2",
21
+ "n_ctx": 2048,
22
+ "n_embd": 768,
23
+ "n_head": 12,
24
+ "n_inner": null,
25
+ "n_layer": 12,
26
+ "n_positions": 2048,
27
+ "pad_token_id": 0,
28
+ "reorder_and_upcast_attn": false,
29
+ "resid_pdrop": 0.1,
30
+ "scale_attn_by_inverse_layer_idx": false,
31
+ "scale_attn_weights": true,
32
+ "summary_activation": null,
33
+ "summary_first_dropout": 0.1,
34
+ "summary_proj_to_labels": true,
35
+ "summary_type": "cls_index",
36
+ "summary_use_proj": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.38.2",
39
+ "use_cache": true,
40
+ "vocab_size": 50264
41
+ }
checkpoint-1000/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.38.2"
7
+ }
checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37cb865b074caec89ac5294c8bb6e7c8a294546b08d1bb3cb0c36ce17d63d3ba
3
+ size 1388158
checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e121ffa3253328f1eff6245d7eacd2c1dedcae0837e7fa49498d1684f9622f5e
3
+ size 14168
checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4812307e94efb2f137ca5c1953b5b0ff829062afd8484a4b5f0fbde1a91e36c
3
+ size 1056
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.4091122592766556,
5
+ "eval_steps": 500,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7,
13
+ "grad_norm": 2.76055908203125,
14
+ "learning_rate": 0.00016473906911142455,
15
+ "loss": 3.9965,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.41,
20
+ "grad_norm": 2.5890133380889893,
21
+ "learning_rate": 0.00012947813822284908,
22
+ "loss": 3.3018,
23
+ "step": 1000
24
+ }
25
+ ],
26
+ "logging_steps": 500,
27
+ "max_steps": 2836,
28
+ "num_input_tokens_seen": 0,
29
+ "num_train_epochs": 4,
30
+ "save_steps": 500,
31
+ "total_flos": 391921717248000.0,
32
+ "train_batch_size": 8,
33
+ "trial_name": null,
34
+ "trial_params": null
35
+ }
checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6505f97547c48a1103750e9e7198c48bcaf26b74eb97216d25dcbdea6812f6b3
3
+ size 4896
checkpoint-1500/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ai-forever/rugpt3small_based_on_gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 1,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 2,
11
+ "gradient_checkpointing": false,
12
+ "id2label": {
13
+ "0": "LABEL_0"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "label2id": {
17
+ "LABEL_0": 0
18
+ },
19
+ "layer_norm_epsilon": 1e-05,
20
+ "model_type": "gpt2",
21
+ "n_ctx": 2048,
22
+ "n_embd": 768,
23
+ "n_head": 12,
24
+ "n_inner": null,
25
+ "n_layer": 12,
26
+ "n_positions": 2048,
27
+ "pad_token_id": 0,
28
+ "reorder_and_upcast_attn": false,
29
+ "resid_pdrop": 0.1,
30
+ "scale_attn_by_inverse_layer_idx": false,
31
+ "scale_attn_weights": true,
32
+ "summary_activation": null,
33
+ "summary_first_dropout": 0.1,
34
+ "summary_proj_to_labels": true,
35
+ "summary_type": "cls_index",
36
+ "summary_use_proj": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.38.2",
39
+ "use_cache": true,
40
+ "vocab_size": 50264
41
+ }
checkpoint-1500/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.38.2"
7
+ }
checkpoint-1500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b74bc60dab96ef0b2acf830908cd0525031f557a7489e53f162a1add329737e5
3
+ size 500941440
checkpoint-1500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ca317b0bea5569c774d37d3047c5b02d5fc6697ea9b89164d9fe1605e322c28
3
+ size 1388158
checkpoint-1500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cd215dc687de9fce2adf066fc0a921e6a64e10dd21038a7f121d72ad6bd7314
3
+ size 14168
checkpoint-1500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b45dbeb090b7e47dab2b3373891d65a6f597444666dbcc40beff73e873f0914a
3
+ size 1056
checkpoint-1500/trainer_state.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.1136683889149834,
5
+ "eval_steps": 500,
6
+ "global_step": 1500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7,
13
+ "grad_norm": 2.76055908203125,
14
+ "learning_rate": 0.00016473906911142455,
15
+ "loss": 3.9965,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.41,
20
+ "grad_norm": 2.5890133380889893,
21
+ "learning_rate": 0.00012947813822284908,
22
+ "loss": 3.3018,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 2.11,
27
+ "grad_norm": 2.6693315505981445,
28
+ "learning_rate": 9.421720733427363e-05,
29
+ "loss": 2.8988,
30
+ "step": 1500
31
+ }
32
+ ],
33
+ "logging_steps": 500,
34
+ "max_steps": 2836,
35
+ "num_input_tokens_seen": 0,
36
+ "num_train_epochs": 4,
37
+ "save_steps": 500,
38
+ "total_flos": 587874410496000.0,
39
+ "train_batch_size": 8,
40
+ "trial_name": null,
41
+ "trial_params": null
42
+ }
checkpoint-1500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6505f97547c48a1103750e9e7198c48bcaf26b74eb97216d25dcbdea6812f6b3
3
+ size 4896
checkpoint-2000/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ai-forever/rugpt3small_based_on_gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 1,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 2,
11
+ "gradient_checkpointing": false,
12
+ "id2label": {
13
+ "0": "LABEL_0"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "label2id": {
17
+ "LABEL_0": 0
18
+ },
19
+ "layer_norm_epsilon": 1e-05,
20
+ "model_type": "gpt2",
21
+ "n_ctx": 2048,
22
+ "n_embd": 768,
23
+ "n_head": 12,
24
+ "n_inner": null,
25
+ "n_layer": 12,
26
+ "n_positions": 2048,
27
+ "pad_token_id": 0,
28
+ "reorder_and_upcast_attn": false,
29
+ "resid_pdrop": 0.1,
30
+ "scale_attn_by_inverse_layer_idx": false,
31
+ "scale_attn_weights": true,
32
+ "summary_activation": null,
33
+ "summary_first_dropout": 0.1,
34
+ "summary_proj_to_labels": true,
35
+ "summary_type": "cls_index",
36
+ "summary_use_proj": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.38.2",
39
+ "use_cache": true,
40
+ "vocab_size": 50264
41
+ }
checkpoint-2000/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.38.2"
7
+ }
checkpoint-2000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:736ef1316fe65f10219acc133d2d71087903c158af67d91e793f9bd8de397413
3
+ size 500941440
checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27d4d6cb328da6a8244ea3a2c942cee72774ff18eea7993c161406a6bb3e8fca
3
+ size 1388158
checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cc233f77f651a7ac95f48bc181d627820f2e9b89a2e99d9d3e32b4cc49d8a86
3
+ size 14168
checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e8ee96adef4a71f3243de85066da41273d922b6147757f524ad99764797a7d8
3
+ size 1056
checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.8182245185533112,
5
+ "eval_steps": 500,
6
+ "global_step": 2000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7,
13
+ "grad_norm": 2.76055908203125,
14
+ "learning_rate": 0.00016473906911142455,
15
+ "loss": 3.9965,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.41,
20
+ "grad_norm": 2.5890133380889893,
21
+ "learning_rate": 0.00012947813822284908,
22
+ "loss": 3.3018,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 2.11,
27
+ "grad_norm": 2.6693315505981445,
28
+ "learning_rate": 9.421720733427363e-05,
29
+ "loss": 2.8988,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 2.82,
34
+ "grad_norm": 3.1421236991882324,
35
+ "learning_rate": 5.8956276445698163e-05,
36
+ "loss": 2.3929,
37
+ "step": 2000
38
+ }
39
+ ],
40
+ "logging_steps": 500,
41
+ "max_steps": 2836,
42
+ "num_input_tokens_seen": 0,
43
+ "num_train_epochs": 4,
44
+ "save_steps": 500,
45
+ "total_flos": 783843434496000.0,
46
+ "train_batch_size": 8,
47
+ "trial_name": null,
48
+ "trial_params": null
49
+ }
checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6505f97547c48a1103750e9e7198c48bcaf26b74eb97216d25dcbdea6812f6b3
3
+ size 4896
checkpoint-2500/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ai-forever/rugpt3small_based_on_gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 1,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 2,
11
+ "gradient_checkpointing": false,
12
+ "id2label": {
13
+ "0": "LABEL_0"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "label2id": {
17
+ "LABEL_0": 0
18
+ },
19
+ "layer_norm_epsilon": 1e-05,
20
+ "model_type": "gpt2",
21
+ "n_ctx": 2048,
22
+ "n_embd": 768,
23
+ "n_head": 12,
24
+ "n_inner": null,
25
+ "n_layer": 12,
26
+ "n_positions": 2048,
27
+ "pad_token_id": 0,
28
+ "reorder_and_upcast_attn": false,
29
+ "resid_pdrop": 0.1,
30
+ "scale_attn_by_inverse_layer_idx": false,
31
+ "scale_attn_weights": true,
32
+ "summary_activation": null,
33
+ "summary_first_dropout": 0.1,
34
+ "summary_proj_to_labels": true,
35
+ "summary_type": "cls_index",
36
+ "summary_use_proj": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.38.2",
39
+ "use_cache": true,
40
+ "vocab_size": 50264
41
+ }
checkpoint-2500/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.38.2"
7
+ }
checkpoint-2500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53c45d85117a64a54ff73e4866b797410164a8d56a769b4a8e2eb698ffccf3d2
3
+ size 500941440
checkpoint-2500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5322e35104def8f18c9f5306b719be2b042f78de65a24f2900586fe19924f709
3
+ size 1388158
checkpoint-2500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a04a8ee606b906d536d7725250ff91cd3c81b932f89d623d0f0956c076d68a2f
3
+ size 14168
checkpoint-2500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24c73de17b528e3ce1bd60b59da5ca7d31e43f676c99b8f33dd080550a88b87f
3
+ size 1056
checkpoint-2500/trainer_state.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.522780648191639,
5
+ "eval_steps": 500,
6
+ "global_step": 2500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7,
13
+ "grad_norm": 2.76055908203125,
14
+ "learning_rate": 0.00016473906911142455,
15
+ "loss": 3.9965,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.41,
20
+ "grad_norm": 2.5890133380889893,
21
+ "learning_rate": 0.00012947813822284908,
22
+ "loss": 3.3018,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 2.11,
27
+ "grad_norm": 2.6693315505981445,
28
+ "learning_rate": 9.421720733427363e-05,
29
+ "loss": 2.8988,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 2.82,
34
+ "grad_norm": 3.1421236991882324,
35
+ "learning_rate": 5.8956276445698163e-05,
36
+ "loss": 2.3929,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 3.52,
41
+ "grad_norm": 2.9969077110290527,
42
+ "learning_rate": 2.3695345557122707e-05,
43
+ "loss": 2.0648,
44
+ "step": 2500
45
+ }
46
+ ],
47
+ "logging_steps": 500,
48
+ "max_steps": 2836,
49
+ "num_input_tokens_seen": 0,
50
+ "num_train_epochs": 4,
51
+ "save_steps": 500,
52
+ "total_flos": 979796127744000.0,
53
+ "train_batch_size": 8,
54
+ "trial_name": null,
55
+ "trial_params": null
56
+ }
checkpoint-2500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6505f97547c48a1103750e9e7198c48bcaf26b74eb97216d25dcbdea6812f6b3
3
+ size 4896
checkpoint-500/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ai-forever/rugpt3small_based_on_gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 1,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 2,
11
+ "gradient_checkpointing": false,
12
+ "id2label": {
13
+ "0": "LABEL_0"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "label2id": {
17
+ "LABEL_0": 0
18
+ },
19
+ "layer_norm_epsilon": 1e-05,
20
+ "model_type": "gpt2",
21
+ "n_ctx": 2048,
22
+ "n_embd": 768,
23
+ "n_head": 12,
24
+ "n_inner": null,
25
+ "n_layer": 12,
26
+ "n_positions": 2048,
27
+ "pad_token_id": 0,
28
+ "reorder_and_upcast_attn": false,
29
+ "resid_pdrop": 0.1,
30
+ "scale_attn_by_inverse_layer_idx": false,
31
+ "scale_attn_weights": true,
32
+ "summary_activation": null,
33
+ "summary_first_dropout": 0.1,
34
+ "summary_proj_to_labels": true,
35
+ "summary_type": "cls_index",
36
+ "summary_use_proj": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.38.2",
39
+ "use_cache": true,
40
+ "vocab_size": 50264
41
+ }
checkpoint-500/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.38.2"
7
+ }
checkpoint-500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a00378c941bc6126014eeef7950960397719b20b8c67a582f353eedc8cb8375
3
+ size 1388158
checkpoint-500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4af4d83d8c0108d3aa94ee44da238a0a9a61f03a5e9dd4f3e8a4b70c75748a3f
3
+ size 14168
checkpoint-500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84dd3d57acb3fd08cf92f7801193f24ea66bbda53a190defa8e55f5ea31783e4
3
+ size 1056
checkpoint-500/trainer_state.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.7045561296383278,
5
+ "eval_steps": 500,
6
+ "global_step": 500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7,
13
+ "grad_norm": 2.76055908203125,
14
+ "learning_rate": 0.00016473906911142455,
15
+ "loss": 3.9965,
16
+ "step": 500
17
+ }
18
+ ],
19
+ "logging_steps": 500,
20
+ "max_steps": 2836,
21
+ "num_input_tokens_seen": 0,
22
+ "num_train_epochs": 4,
23
+ "save_steps": 500,
24
+ "total_flos": 195969024000000.0,
25
+ "train_batch_size": 8,
26
+ "trial_name": null,
27
+ "trial_params": null
28
+ }
checkpoint-500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6505f97547c48a1103750e9e7198c48bcaf26b74eb97216d25dcbdea6812f6b3
3
+ size 4896
config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ai-forever/rugpt3small_based_on_gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 1,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 2,
11
+ "gradient_checkpointing": false,
12
+ "id2label": {
13
+ "0": "LABEL_0"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "label2id": {
17
+ "LABEL_0": 0
18
+ },
19
+ "layer_norm_epsilon": 1e-05,
20
+ "model_type": "gpt2",
21
+ "n_ctx": 2048,
22
+ "n_embd": 768,
23
+ "n_head": 12,
24
+ "n_inner": null,
25
+ "n_layer": 12,
26
+ "n_positions": 2048,
27
+ "pad_token_id": 0,
28
+ "reorder_and_upcast_attn": false,
29
+ "resid_pdrop": 0.1,
30
+ "scale_attn_by_inverse_layer_idx": false,
31
+ "scale_attn_weights": true,
32
+ "summary_activation": null,
33
+ "summary_first_dropout": 0.1,
34
+ "summary_proj_to_labels": true,
35
+ "summary_type": "cls_index",
36
+ "summary_use_proj": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.38.2",
39
+ "use_cache": true,
40
+ "vocab_size": 50264
41
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.38.2"
7
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03dff07ae63677639709968c7bb7bf7fd697d4d09009f58689244dc145bb5665
3
+ size 500941440
runs/Apr13_05-49-15_e3cdf6043cc1/events.out.tfevents.1712987355.e3cdf6043cc1.332.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f5e167e7468675807dd1f7696c7c08d2a5093449dd452571b72089ad0e675d7
3
+ size 5280
runs/Apr13_05-52-47_e3cdf6043cc1/events.out.tfevents.1712987568.e3cdf6043cc1.332.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81a78146098a7bea7667e4048641c1fb14c5d122515d1f022686331449ec2a05
3
+ size 6468
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6505f97547c48a1103750e9e7198c48bcaf26b74eb97216d25dcbdea6812f6b3
3
+ size 4896