blogmilx commited on
Commit
a266a86
·
verified ·
1 Parent(s): 6677250

End of training

Browse files
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  library_name: transformers
3
- license: apache-2.0
4
- base_model: bert-base-uncased
5
  tags:
6
  - generated_from_trainer
7
  model-index:
@@ -14,14 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # checkpoints
16
 
17
- This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on the None dataset.
18
- It achieves the following results on the evaluation set:
19
- - eval_loss: 0.0131
20
- - eval_runtime: 1367.6796
21
- - eval_samples_per_second: 139.329
22
- - eval_steps_per_second: 17.416
23
- - epoch: 0.0420
24
- - step: 1000
25
 
26
  ## Model description
27
 
@@ -40,17 +33,21 @@ More information needed
40
  ### Training hyperparameters
41
 
42
  The following hyperparameters were used during training:
43
- - learning_rate: 2e-05
44
- - train_batch_size: 8
45
  - eval_batch_size: 8
46
  - seed: 42
47
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
48
  - lr_scheduler_type: linear
49
- - num_epochs: 3
 
 
 
 
50
 
51
  ### Framework versions
52
 
53
  - Transformers 4.44.2
54
- - Pytorch 2.4.1+cu121
55
- - Datasets 3.0.2
56
  - Tokenizers 0.19.1
 
1
  ---
2
  library_name: transformers
3
+ license: mit
4
+ base_model: gpt2
5
  tags:
6
  - generated_from_trainer
7
  model-index:
 
14
 
15
  # checkpoints
16
 
17
+ This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
 
 
 
 
 
 
 
18
 
19
  ## Model description
20
 
 
33
  ### Training hyperparameters
34
 
35
  The following hyperparameters were used during training:
36
+ - learning_rate: 5e-05
37
+ - train_batch_size: 4
38
  - eval_batch_size: 8
39
  - seed: 42
40
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
41
  - lr_scheduler_type: linear
42
+ - num_epochs: 1
43
+
44
+ ### Training results
45
+
46
+
47
 
48
  ### Framework versions
49
 
50
  - Transformers 4.44.2
51
+ - Pytorch 2.5.0+cu121
52
+ - Datasets 3.1.0
53
  - Tokenizers 0.19.1
config.json CHANGED
@@ -1,26 +1,39 @@
1
  {
2
- "_name_or_path": "bert-base-uncased",
 
3
  "architectures": [
4
- "BertForQuestionAnswering"
5
  ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "gradient_checkpointing": false,
9
- "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.1,
11
- "hidden_size": 768,
12
  "initializer_range": 0.02,
13
- "intermediate_size": 3072,
14
- "layer_norm_eps": 1e-12,
15
- "max_position_embeddings": 512,
16
- "model_type": "bert",
17
- "num_attention_heads": 12,
18
- "num_hidden_layers": 12,
19
- "pad_token_id": 0,
20
- "position_embedding_type": "absolute",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  "torch_dtype": "float32",
22
  "transformers_version": "4.44.2",
23
- "type_vocab_size": 2,
24
  "use_cache": true,
25
- "vocab_size": 30522
26
  }
 
1
  {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
  "architectures": [
5
+ "GPT2LMHeadModel"
6
  ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
 
 
11
  "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
  "torch_dtype": "float32",
36
  "transformers_version": "4.44.2",
 
37
  "use_cache": true,
38
+ "vocab_size": 50257
39
  }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.44.2"
6
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53e497fedb85fde671da7e5e599a0b293bacb64e4b3b51a5e0f42693be540985
3
- size 435596088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9a5fb8843c4d4099f380bba150fc50a8f756bdb391817a824f129eee95f5b24
3
+ size 497774208
runs/Oct31_15-26-38_9c0dc4be863c/events.out.tfevents.1730388405.9c0dc4be863c.615.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:498a85b3989e0745092be3d33f10fed92e92b19e7f75d2d67542569ffcf4fb8b
3
+ size 7082
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b5222615ba48b12e344b3c124b0193b1fbb209275052e8d2cc228b9d83dd17a
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ee3b672d2d73d5d9f2fda314763267750f8446945c7af90c13d1d5fe8d570b4
3
  size 5240