José Antonio García Díaz
commited on
Commit
·
468f057
1
Parent(s):
dbf8b1f
upload model
Browse files- config.json +48 -0
- hyperparameters.csv +11 -0
- optimizer.pt +3 -0
- pytorch_model.bin +3 -0
- scheduler.pt +3 -0
- trainer_state.json +82 -0
- training_args.bin +3 -0
- training_resume.json +29 -0
config.json
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "dccuchile/bert-base-spanish-wwm-cased",
|
3 |
+
"architectures": [
|
4 |
+
"BertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"id2label": {
|
13 |
+
"0": "LABEL_0",
|
14 |
+
"1": "LABEL_1",
|
15 |
+
"2": "LABEL_2",
|
16 |
+
"3": "LABEL_3",
|
17 |
+
"4": "LABEL_4",
|
18 |
+
"5": "LABEL_5",
|
19 |
+
"6": "LABEL_6",
|
20 |
+
"7": "LABEL_7"
|
21 |
+
},
|
22 |
+
"initializer_range": 0.02,
|
23 |
+
"intermediate_size": 3072,
|
24 |
+
"label2id": {
|
25 |
+
"LABEL_0": 0,
|
26 |
+
"LABEL_1": 1,
|
27 |
+
"LABEL_2": 2,
|
28 |
+
"LABEL_3": 3,
|
29 |
+
"LABEL_4": 4,
|
30 |
+
"LABEL_5": 5,
|
31 |
+
"LABEL_6": 6,
|
32 |
+
"LABEL_7": 7
|
33 |
+
},
|
34 |
+
"layer_norm_eps": 1e-12,
|
35 |
+
"max_position_embeddings": 512,
|
36 |
+
"model_type": "bert",
|
37 |
+
"num_attention_heads": 12,
|
38 |
+
"num_hidden_layers": 12,
|
39 |
+
"output_past": true,
|
40 |
+
"pad_token_id": 1,
|
41 |
+
"position_embedding_type": "absolute",
|
42 |
+
"problem_type": "single_label_classification",
|
43 |
+
"torch_dtype": "float32",
|
44 |
+
"transformers_version": "4.23.1",
|
45 |
+
"type_vocab_size": 2,
|
46 |
+
"use_cache": true,
|
47 |
+
"vocab_size": 31002
|
48 |
+
}
|
hyperparameters.csv
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
objective,best,learning_rate,num_train_epochs,per_device_train_batch_size,warmup_steps,weight_decay,time_this_iter_s
|
2 |
+
0.6331198486192046,False,4.852135288839538e-05,5,8,1000,0.1759370237120074,300.3797607421875
|
3 |
+
0.5332256888334244,False,2.8545916286088344e-05,4,16,250,0.04553082334923364,282.6073889732361
|
4 |
+
0.5252117727656963,False,3.20388365693834e-05,5,8,1000,0.07432435161831266,302.52899074554443
|
5 |
+
0.6374082978762154,True,2.796445810732742e-05,4,8,500,0.21340361893154028,300.44139099121094
|
6 |
+
0.5407608100998937,False,3.208423458510781e-05,3,16,250,0.16242308751065626,282.61042952537537
|
7 |
+
0.5380564263224751,False,1.792675165496557e-05,2,8,500,0.1599939313426146,302.53033447265625
|
8 |
+
0.5375266107138353,False,3.8001358870056086e-05,4,8,1000,0.16967901879559275,302.5936050415039
|
9 |
+
0.6229397036524638,False,4.0623219812735634e-05,3,16,0,0.08733698076322681,280.6228218078613
|
10 |
+
0.5931718087375519,False,2.8861899351678586e-05,2,16,0,0.22607052443337083,281.02607893943787
|
11 |
+
0.5105321046880006,False,2.821667441825732e-05,5,16,1000,0.11698700097614871,282.89386224746704
|
optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28b81de35c86462c75d8c3ecbf3389c840e4751246d28b596778b7ee1c4fcd74
|
3 |
+
size 879023813
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91b35323663ff487c26041b3b83df0e8f78b6100714e3ba00455c582e70127c6
|
3 |
+
size 439500917
|
scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4b5743511062cb47358a6dfb856b1cb5d3af7d4323822c50e79922af37203b8
|
3 |
+
size 627
|
trainer_state.json
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.8237528800964355,
|
3 |
+
"best_model_checkpoint": "./results/run-14f8e466/checkpoint-882",
|
4 |
+
"epoch": 4.0,
|
5 |
+
"global_step": 3528,
|
6 |
+
"is_hyper_param_search": true,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.0,
|
12 |
+
"learning_rate": 5.592891621465484e-08,
|
13 |
+
"loss": 2.1634,
|
14 |
+
"step": 1
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 1.0,
|
18 |
+
"eval_f1": 0.5537101472531625,
|
19 |
+
"eval_loss": 0.8237528800964355,
|
20 |
+
"eval_runtime": 9.1428,
|
21 |
+
"eval_samples_per_second": 257.253,
|
22 |
+
"eval_steps_per_second": 32.157,
|
23 |
+
"step": 882
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 1.13,
|
27 |
+
"learning_rate": 2.3346813109419988e-05,
|
28 |
+
"loss": 1.1017,
|
29 |
+
"step": 1000
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 2.0,
|
33 |
+
"eval_f1": 0.6235863967477107,
|
34 |
+
"eval_loss": 0.8632814288139343,
|
35 |
+
"eval_runtime": 9.1334,
|
36 |
+
"eval_samples_per_second": 257.516,
|
37 |
+
"eval_steps_per_second": 32.19,
|
38 |
+
"step": 1764
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"epoch": 2.27,
|
42 |
+
"learning_rate": 1.411152311360512e-05,
|
43 |
+
"loss": 0.5733,
|
44 |
+
"step": 2000
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 3.0,
|
48 |
+
"eval_f1": 0.6471466926746001,
|
49 |
+
"eval_loss": 1.2097604274749756,
|
50 |
+
"eval_runtime": 9.1352,
|
51 |
+
"eval_samples_per_second": 257.466,
|
52 |
+
"eval_steps_per_second": 32.183,
|
53 |
+
"step": 2646
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"epoch": 3.4,
|
57 |
+
"learning_rate": 4.87623311779025e-06,
|
58 |
+
"loss": 0.2885,
|
59 |
+
"step": 3000
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"epoch": 4.0,
|
63 |
+
"eval_f1": 0.6374082978762154,
|
64 |
+
"eval_loss": 1.457223653793335,
|
65 |
+
"eval_runtime": 9.1351,
|
66 |
+
"eval_samples_per_second": 257.47,
|
67 |
+
"eval_steps_per_second": 32.184,
|
68 |
+
"step": 3528
|
69 |
+
}
|
70 |
+
],
|
71 |
+
"max_steps": 3528,
|
72 |
+
"num_train_epochs": 4,
|
73 |
+
"total_flos": 5484511364643360.0,
|
74 |
+
"trial_name": null,
|
75 |
+
"trial_params": {
|
76 |
+
"learning_rate": 2.796445810732742e-05,
|
77 |
+
"num_train_epochs": 4,
|
78 |
+
"per_device_train_batch_size": 8,
|
79 |
+
"warmup_steps": 500,
|
80 |
+
"weight_decay": 0.21340361893154028
|
81 |
+
}
|
82 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:154040c2f69e216fb0cf9f5316b1a42ad0b75fcd3974504bddf0f571135699fd
|
3 |
+
size 3387
|
training_resume.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"batch_train_size": 16,
|
3 |
+
"batch_val_size": 32,
|
4 |
+
"corpus": "text",
|
5 |
+
"dataset": "safercity",
|
6 |
+
"epochs": [
|
7 |
+
1,
|
8 |
+
2,
|
9 |
+
3,
|
10 |
+
4,
|
11 |
+
5
|
12 |
+
],
|
13 |
+
"folder": "beto",
|
14 |
+
"iterations": 10,
|
15 |
+
"labels": 8,
|
16 |
+
"model": "transformers",
|
17 |
+
"original_pretrained_model": "dccuchile/bert-base-spanish-wwm-cased",
|
18 |
+
"pretrained_model": "../assets/safercity/text/models/beto",
|
19 |
+
"resources_per_trial": {
|
20 |
+
"gpu": 1
|
21 |
+
},
|
22 |
+
"run_id": "14f8e466",
|
23 |
+
"task": "",
|
24 |
+
"task_type": "classification",
|
25 |
+
"tokenizer_field": "tweet",
|
26 |
+
"tokenizer_model": "dccuchile/bert-base-spanish-wwm-cased",
|
27 |
+
"warmup_steps": 500,
|
28 |
+
"weight_decay": 0.01
|
29 |
+
}
|