CodeTed commited on
Commit
88e6444
·
verified ·
1 Parent(s): f896139

best performance

Browse files
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "imxly/t5-copy",
3
+ "architectures": [
4
+ "CopyT5ForConditionalGeneration"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "num_decoder_layers": 12,
21
+ "num_heads": 12,
22
+ "num_layers": 12,
23
+ "output_past": true,
24
+ "pad_token_id": 0,
25
+ "relative_attention_max_distance": 128,
26
+ "relative_attention_num_buckets": 32,
27
+ "tie_word_embeddings": false,
28
+ "tokenizer_class": "T5Tokenizer",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.47.0",
31
+ "use_cache": true,
32
+ "vocab_size": 50000
33
+ }
eval_results.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ eval_loss = 0.2587745115160942
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.47.0"
7
+ }
model_args.json ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "adafactor_beta1": null,
3
+ "adafactor_clip_threshold": 1.0,
4
+ "adafactor_decay_rate": -0.8,
5
+ "adafactor_eps": [
6
+ 1e-30,
7
+ 0.001
8
+ ],
9
+ "adafactor_relative_step": false,
10
+ "adafactor_scale_parameter": false,
11
+ "adafactor_warmup_init": false,
12
+ "adam_epsilon": 1e-08,
13
+ "best_model_dir": "/root/data/outputs/copyt5_zh_nlpcc_pku_csc50k_lev_/best_model",
14
+ "cache_dir": "cache_dir/",
15
+ "config": {},
16
+ "cosine_schedule_num_cycles": 0.5,
17
+ "custom_layer_parameters": [],
18
+ "custom_parameter_groups": [],
19
+ "dataloader_num_workers": 0,
20
+ "do_lower_case": false,
21
+ "dynamic_quantize": false,
22
+ "early_stopping_consider_epochs": false,
23
+ "early_stopping_delta": 0,
24
+ "early_stopping_metric": "eval_loss",
25
+ "early_stopping_metric_minimize": true,
26
+ "early_stopping_patience": 6,
27
+ "encoding": "utf-8",
28
+ "eval_batch_size": 8,
29
+ "evaluate_during_training": true,
30
+ "evaluate_during_training_silent": true,
31
+ "evaluate_during_training_steps": 800,
32
+ "evaluate_during_training_verbose": true,
33
+ "evaluate_each_epoch": true,
34
+ "fp16": false,
35
+ "gradient_accumulation_steps": 1,
36
+ "learning_rate": 0.0001,
37
+ "local_rank": -1,
38
+ "logging_steps": 200,
39
+ "manual_seed": null,
40
+ "max_grad_norm": 1.0,
41
+ "max_seq_length": 200,
42
+ "model_name": "imxly/t5-copy",
43
+ "model_type": "copyt5",
44
+ "multiprocessing_chunksize": -1,
45
+ "n_gpu": 1,
46
+ "no_cache": false,
47
+ "no_save": false,
48
+ "not_saved_args": [],
49
+ "num_train_epochs": 3,
50
+ "optimizer": "AdamW",
51
+ "output_dir": "/root/data/outputs/copyt5_zh_nlpcc_pku_csc50k_lev_/",
52
+ "overwrite_output_dir": true,
53
+ "polynomial_decay_schedule_lr_end": 1e-07,
54
+ "polynomial_decay_schedule_power": 1.0,
55
+ "process_count": 46,
56
+ "quantized_model": false,
57
+ "reprocess_input_data": true,
58
+ "save_best_model": true,
59
+ "save_eval_checkpoints": false,
60
+ "save_model_every_epoch": false,
61
+ "save_optimizer_and_scheduler": true,
62
+ "save_steps": 15000,
63
+ "scheduler": "linear_schedule_with_warmup",
64
+ "silent": false,
65
+ "skip_special_tokens": true,
66
+ "tensorboard_dir": null,
67
+ "thread_count": null,
68
+ "tokenizer_name": null,
69
+ "tokenizer_type": null,
70
+ "train_batch_size": 32,
71
+ "train_custom_parameters_only": false,
72
+ "use_cached_eval_features": false,
73
+ "use_early_stopping": true,
74
+ "use_hf_datasets": false,
75
+ "use_multiprocessing": false,
76
+ "use_multiprocessing_for_evaluation": false,
77
+ "wandb_kwargs": {},
78
+ "wandb_project": null,
79
+ "warmup_ratio": 0.06,
80
+ "warmup_steps": 8687,
81
+ "weight_decay": 0.0,
82
+ "model_class": "CopyT5Model",
83
+ "dataset_class": null,
84
+ "do_sample": false,
85
+ "early_stopping": true,
86
+ "evaluate_generated_text": true,
87
+ "length_penalty": 2.0,
88
+ "max_length": 200,
89
+ "max_steps": -1,
90
+ "num_beams": 3,
91
+ "num_return_sequences": 1,
92
+ "preprocess_inputs": true,
93
+ "repetition_penalty": 1.0,
94
+ "special_tokens_list": [
95
+ "[unused1]",
96
+ "[unused2]",
97
+ "[unused3]",
98
+ "[unused4]",
99
+ "[unused5]"
100
+ ],
101
+ "top_k": null,
102
+ "top_p": null,
103
+ "use_multiprocessed_decoding": false
104
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "[unused1]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "[unused2]",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "[unused3]",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "[unused4]",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "[unused5]",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ }
38
+ ],
39
+ "cls_token": "[CLS]",
40
+ "mask_token": "[MASK]",
41
+ "pad_token": "[PAD]",
42
+ "sep_token": "[SEP]",
43
+ "unk_token": "[UNK]"
44
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[unused1]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[unused2]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[unused3]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[unused4]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "[unused5]",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "100": {
52
+ "content": "[UNK]",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "101": {
60
+ "content": "[CLS]",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "102": {
68
+ "content": "[SEP]",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "103": {
76
+ "content": "[MASK]",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ }
83
+ },
84
+ "additional_special_tokens": [
85
+ "[unused1]",
86
+ "[unused2]",
87
+ "[unused3]",
88
+ "[unused4]",
89
+ "[unused5]"
90
+ ],
91
+ "clean_up_tokenization_spaces": true,
92
+ "cls_token": "[CLS]",
93
+ "do_basic_tokenize": true,
94
+ "do_lower_case": true,
95
+ "extra_special_tokens": {},
96
+ "mask_token": "[MASK]",
97
+ "model_max_length": 1000000000000000019884624838656,
98
+ "never_split": null,
99
+ "pad_token": "[PAD]",
100
+ "sep_token": "[SEP]",
101
+ "strip_accents": null,
102
+ "tokenize_chinese_chars": true,
103
+ "tokenizer_class": "ZHTokenizer",
104
+ "unk_token": "[UNK]"
105
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f713cadeb63ec673d4e3d887ef0ce2e394ad77a767e3de001eec1e6733d06ddd
3
+ size 3704
training_progress_scores.csv ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ global_step,eval_loss,train_loss,matches
2
+ 800,0.41702376306056976,1.4951772689819336,0.9494125431237969
3
+ 1600,0.3887445777654648,1.4314727783203125,0.954538024253534
4
+ 2400,0.3581150472164154,1.0880261659622192,0.9597188445917524
5
+ 3200,0.35775092244148254,1.542741298675537,0.9612566103086226
6
+ 4000,0.3568766713142395,1.0402278900146484,0.9564395677021065
7
+ 4800,0.3441026359796524,1.1405222415924072,0.959398327082415
8
+ 5600,0.33802540600299835,1.1654727458953857,0.9518268985109863
9
+ 6400,0.3336428105831146,1.2279239892959595,0.9630423245943369
10
+ 7200,0.335241436958313,1.2788712978363037,0.9579324344844469
11
+ 8000,0.340187668800354,1.3176225423812866,0.9594708960229085
12
+ 8800,0.33131301403045654,1.1144055128097534,0.9503610059130183
13
+ 9600,0.3288237750530243,1.0765184164047241,0.9579595207906675
14
+ 10400,0.3388015478849411,1.1630008220672607,0.9593543710384589
15
+ 11200,0.30543583631515503,1.1313109397888184,0.963354371038459
16
+ 12000,0.32609236240386963,0.9187220931053162,0.9522115138956018
17
+ 12800,0.30562080442905426,1.1777799129486084,0.9648469083518917
18
+ 13600,0.3183724582195282,0.7291817665100098,0.9611840413681293
19
+ 14400,0.32210569083690643,0.9249140620231628,0.9665411842252721
20
+ 15200,0.25943733751773834,0.6917589902877808,0.9607370182420016
21
+ 16000,0.2648443505167961,1.1873326301574707,0.9643084468134301
22
+ 16800,0.26724664121866226,0.9436103105545044,0.9634917336758215
23
+ 17600,0.2495434284210205,0.6649165749549866,0.9521016237857116
24
+ 18400,0.25149868428707123,0.9023253917694092,0.9628159094999974
25
+ 19200,0.23414570093154907,0.8953883647918701,0.9628159094999974
26
+ 20000,0.24547121673822403,1.2020918130874634,0.9573214040054918
27
+ 20800,0.25678517669439316,1.029130458831787,0.9663853698903532
28
+ 21600,0.23246226459741592,1.1233551502227783,0.970387338071426
29
+ 22400,0.24005521833896637,1.062412977218628,0.9629743728853516
30
+ 23200,0.24047152698040009,0.8992595672607422,0.9683084468134302
31
+ 24000,0.23483598977327347,0.9550911784172058,0.9683084468134302
32
+ 24800,0.24171914905309677,0.818202018737793,0.9663492631399608
33
+ 25600,0.2676503509283066,0.862794816493988,0.9746342613497709
34
+ 26400,0.25410324335098267,0.8352174162864685,0.9623853698903533
35
+ 27200,0.2587745115160942,0.886418879032135,0.96292547014098
vocab.txt ADDED
The diff for this file is too large to render. See raw diff