alex2awesome commited on
Commit
55c3b8b
1 Parent(s): 5df7060

first commit

Browse files
all_results.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_e": 0.0,
4
+ "eval_f1": 0.003218218203323381,
5
+ "eval_loss": 7.38798189163208,
6
+ "eval_runtime": 81.0639,
7
+ "eval_samples": 3576,
8
+ "eval_samples_per_second": 44.113,
9
+ "eval_steps_per_second": 44.113,
10
+ "test_e": 0.6023489932885906,
11
+ "test_f1": 0.5506494789202888,
12
+ "test_loss": 2.7486965656280518,
13
+ "test_runtime": 80.778,
14
+ "test_samples_per_second": 44.269,
15
+ "test_steps_per_second": 44.269,
16
+ "train_loss": 1.5977188166040575,
17
+ "train_runtime": 2455.1123,
18
+ "train_samples": 10138,
19
+ "train_samples_per_second": 8.259,
20
+ "train_steps_per_second": 8.259
21
+ }
callback-metrics-state-0.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"INDIRECT QUOTE_loss": 7.3920416831970215, "INDIRECT QUOTE_f1": 0.002867467424910844, "INDIRECT QUOTE_e": 0.0, "DIRECT QUOTE_loss": 7.395792007446289, "DIRECT QUOTE_f1": 0.0026685346041993878, "DIRECT QUOTE_e": 0.0, "BACKGROUND_loss": 7.365277290344238, "BACKGROUND_f1": 0.004162366856499275, "BACKGROUND_e": 0.0, "PUBLISHED WORK_loss": 7.38569974899292, "PUBLISHED WORK_f1": 0.0026257877273695965, "PUBLISHED WORK_e": 0.0, "STATEMENT_loss": 7.371387004852295, "STATEMENT_f1": 0.002811513597655576, "STATEMENT_e": 0.0, "SOCIAL MEDIA POST_loss": 7.371214389801025, "SOCIAL MEDIA POST_f1": 0.0021667480985203876, "SOCIAL MEDIA POST_e": 0.0, "PRESS REPORT_loss": 7.400311470031738, "PRESS REPORT_f1": 0.002322142960838986, "PRESS REPORT_e": 0.0, "DECLINED COMMENT_loss": 7.410003662109375, "DECLINED COMMENT_f1": 0.0035249913168271703, "DECLINED COMMENT_e": 0.0, "PROPOSAL/ORDER/LAW_loss": 7.406617641448975, "PROPOSAL/ORDER/LAW_f1": 0.008154212035879524, "PROPOSAL/ORDER/LAW_e": 0.0, "PRICE SIGNAL_loss": 7.322811126708984, "PRICE SIGNAL_f1": 0.0020276297755392704, "PRICE SIGNAL_e": 0.0, "NARRATIVE_loss": 7.39856481552124, "NARRATIVE_f1": 0.005181297276397006, "NARRATIVE_e": 0.0, "DIRECT OBSERVATION_loss": 7.415427207946777, "DIRECT OBSERVATION_f1": 0.0, "DIRECT OBSERVATION_e": 0.0, "COMMUNICATION_loss": 7.398114204406738, "COMMUNICATION_f1": 0.003210920804232077, "COMMUNICATION_e": 0.0, "PUBLIC SPEECH_loss": 7.275794506072998, "PUBLIC SPEECH_f1": 0.010951878692647335, "PUBLIC SPEECH_e": 0.0, "VOTE/POLL_loss": 7.344000816345215, "VOTE/POLL_f1": 0.0036740648664935344, "VOTE/POLL_e": 0.0, "COURT PROCEEDING_loss": 7.464186191558838, "COURT PROCEEDING_f1": 0.0020636257293816943, "COURT PROCEEDING_e": 0.0, "full_loss": 7.38798189163208, "full_f1": 0.003218218203323381, "full_e": 0.0}
callback-metrics-state-10000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"INDIRECT QUOTE_loss": 2.0357444286346436, "INDIRECT QUOTE_f1": 0.5565934558159891, "INDIRECT QUOTE_e": 0.5645454545454546, "DIRECT QUOTE_loss": 2.3745360374450684, "DIRECT QUOTE_f1": 0.5075596828055501, "DIRECT QUOTE_e": 0.5552784704904405, "BACKGROUND_loss": 2.600733995437622, "BACKGROUND_f1": 0.5894816406479484, "BACKGROUND_e": 0.6398104265402843, "PUBLISHED WORK_loss": 3.1265647411346436, "PUBLISHED WORK_f1": 0.40655165655496034, "PUBLISHED WORK_e": 0.44274809160305345, "STATEMENT_loss": 2.103498935699463, "STATEMENT_f1": 0.5028104423094844, "STATEMENT_e": 0.6381578947368421, "SOCIAL MEDIA POST_loss": 4.426347732543945, "SOCIAL MEDIA POST_f1": 0.1238095238095238, "SOCIAL MEDIA POST_e": 0.24242424242424243, "PRESS REPORT_loss": 3.8396220207214355, "PRESS REPORT_f1": 0.4449627001351139, "PRESS REPORT_e": 0.4827586206896552, "DECLINED COMMENT_loss": 2.6896440982818604, "DECLINED COMMENT_f1": 0.3125, "DECLINED COMMENT_e": 0.4375, "PROPOSAL/ORDER/LAW_loss": 3.2415931224823, "PROPOSAL/ORDER/LAW_f1": 0.28395954022988507, "PROPOSAL/ORDER/LAW_e": 0.36, "PRICE SIGNAL_loss": 3.032341241836548, "PRICE SIGNAL_f1": 0.39546112988894255, "PRICE SIGNAL_e": 0.42105263157894735, "NARRATIVE_loss": 1.632098913192749, "NARRATIVE_f1": 0.5818456841015806, "NARRATIVE_e": 0.6764705882352942, "DIRECT OBSERVATION_loss": 4.492552757263184, "DIRECT OBSERVATION_f1": 0.01694915254237288, "DIRECT OBSERVATION_e": 0.11864406779661017, "COMMUNICATION_loss": 2.633908748626709, "COMMUNICATION_f1": 0.546875, "COMMUNICATION_e": 0.546875, "PUBLIC SPEECH_loss": 3.456928014755249, "PUBLIC SPEECH_f1": 0.27166666666666667, "PUBLIC SPEECH_e": 0.6333333333333333, "VOTE/POLL_loss": 4.232885360717773, "VOTE/POLL_f1": 0.29275160256595884, "VOTE/POLL_e": 0.30434782608695654, "COURT PROCEEDING_loss": 3.8775787353515625, "COURT PROCEEDING_f1": 0.3294430014430014, "COURT PROCEEDING_e": 0.44, "full_loss": 2.430802822113037, "full_f1": 0.5065431932934646, "full_e": 0.5542505592841164}
callback-metrics-state-15000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"INDIRECT QUOTE_loss": 2.478729248046875, "INDIRECT QUOTE_f1": 0.5864855228766466, "INDIRECT QUOTE_e": 0.5954545454545455, "DIRECT QUOTE_loss": 3.0114314556121826, "DIRECT QUOTE_f1": 0.530530760516083, "DIRECT QUOTE_e": 0.5685785536159601, "BACKGROUND_loss": 3.5276763439178467, "BACKGROUND_f1": 0.5905275636316465, "BACKGROUND_e": 0.6350710900473934, "PUBLISHED WORK_loss": 4.573206901550293, "PUBLISHED WORK_f1": 0.427382933012704, "PUBLISHED WORK_e": 0.48854961832061067, "STATEMENT_loss": 2.3900763988494873, "STATEMENT_f1": 0.5186403508771931, "STATEMENT_e": 0.6447368421052632, "SOCIAL MEDIA POST_loss": 5.451953887939453, "SOCIAL MEDIA POST_f1": 0.18181818181818182, "SOCIAL MEDIA POST_e": 0.30303030303030304, "PRESS REPORT_loss": 5.020430564880371, "PRESS REPORT_f1": 0.5002736726874658, "PRESS REPORT_e": 0.5344827586206896, "DECLINED COMMENT_loss": 3.7660765647888184, "DECLINED COMMENT_f1": 0.4068688118811881, "DECLINED COMMENT_e": 0.5625, "PROPOSAL/ORDER/LAW_loss": 4.049454689025879, "PROPOSAL/ORDER/LAW_f1": 0.2889760348583878, "PROPOSAL/ORDER/LAW_e": 0.4266666666666667, "PRICE SIGNAL_loss": 3.616384744644165, "PRICE SIGNAL_f1": 0.4947368421052632, "PRICE SIGNAL_e": 0.5263157894736842, "NARRATIVE_loss": 1.9826314449310303, "NARRATIVE_f1": 0.6029803893362629, "NARRATIVE_e": 0.6985294117647058, "DIRECT OBSERVATION_loss": 5.71368932723999, "DIRECT OBSERVATION_f1": 0.01694915254237288, "DIRECT OBSERVATION_e": 0.2033898305084746, "COMMUNICATION_loss": 3.4060516357421875, "COMMUNICATION_f1": 0.5073826058201059, "COMMUNICATION_e": 0.5, "PUBLIC SPEECH_loss": 3.7426810264587402, "PUBLIC SPEECH_f1": 0.25891142160512753, "PUBLIC SPEECH_e": 0.4666666666666667, "VOTE/POLL_loss": 4.689935207366943, "VOTE/POLL_f1": 0.3312252964426877, "VOTE/POLL_e": 0.34782608695652173, "COURT PROCEEDING_loss": 5.710793972015381, "COURT PROCEEDING_f1": 0.2707763347763348, "COURT PROCEEDING_e": 0.44, "full_loss": 3.077216148376465, "full_f1": 0.5272328806773431, "full_e": 0.5724272930648769}
callback-metrics-state-20000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"INDIRECT QUOTE_loss": 2.1148886680603027, "INDIRECT QUOTE_f1": 0.6061409545975309, "INDIRECT QUOTE_e": 0.6154545454545455, "DIRECT QUOTE_loss": 2.783146381378174, "DIRECT QUOTE_f1": 0.5601386615724058, "DIRECT QUOTE_e": 0.6043225270157938, "BACKGROUND_loss": 3.3464651107788086, "BACKGROUND_f1": 0.6221739141179379, "BACKGROUND_e": 0.6682464454976303, "PUBLISHED WORK_loss": 3.9592363834381104, "PUBLISHED WORK_f1": 0.43717210437759113, "PUBLISHED WORK_e": 0.5038167938931297, "STATEMENT_loss": 2.1502439975738525, "STATEMENT_f1": 0.5222579773709637, "STATEMENT_e": 0.6381578947368421, "SOCIAL MEDIA POST_loss": 4.850955963134766, "SOCIAL MEDIA POST_f1": 0.15151515151515152, "SOCIAL MEDIA POST_e": 0.2727272727272727, "PRESS REPORT_loss": 4.060721397399902, "PRESS REPORT_f1": 0.45439298174801934, "PRESS REPORT_e": 0.5, "DECLINED COMMENT_loss": 3.9542932510375977, "DECLINED COMMENT_f1": 0.375, "DECLINED COMMENT_e": 0.5, "PROPOSAL/ORDER/LAW_loss": 3.2367727756500244, "PROPOSAL/ORDER/LAW_f1": 0.32273568107458245, "PROPOSAL/ORDER/LAW_e": 0.44, "PRICE SIGNAL_loss": 3.1010141372680664, "PRICE SIGNAL_f1": 0.45087719298245615, "PRICE SIGNAL_e": 0.47368421052631576, "NARRATIVE_loss": 1.8089553117752075, "NARRATIVE_f1": 0.6354846971078675, "NARRATIVE_e": 0.7352941176470589, "DIRECT OBSERVATION_loss": 3.868048906326294, "DIRECT OBSERVATION_f1": 0.01694915254237288, "DIRECT OBSERVATION_e": 0.4067796610169492, "COMMUNICATION_loss": 3.3353352546691895, "COMMUNICATION_f1": 0.570775462962963, "COMMUNICATION_e": 0.5625, "PUBLIC SPEECH_loss": 3.9616973400115967, "PUBLIC SPEECH_f1": 0.3, "PUBLIC SPEECH_e": 0.5666666666666667, "VOTE/POLL_loss": 4.4696855545043945, "VOTE/POLL_f1": 0.2924901185770751, "VOTE/POLL_e": 0.30434782608695654, "COURT PROCEEDING_loss": 5.168118953704834, "COURT PROCEEDING_f1": 0.34277633477633473, "COURT PROCEEDING_e": 0.44, "full_loss": 2.754518747329712, "full_f1": 0.5501093051292324, "full_e": 0.6009507829977628}
callback-metrics-state-5000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"INDIRECT QUOTE_loss": 2.8530545234680176, "INDIRECT QUOTE_f1": 0.4941323500870946, "INDIRECT QUOTE_e": 0.5, "DIRECT QUOTE_loss": 3.684295892715454, "DIRECT QUOTE_f1": 0.42220711015100093, "DIRECT QUOTE_e": 0.4513715710723192, "BACKGROUND_loss": 3.72399640083313, "BACKGROUND_f1": 0.5459073471062601, "BACKGROUND_e": 0.5829383886255924, "PUBLISHED WORK_loss": 5.283926010131836, "PUBLISHED WORK_f1": 0.33880863285241525, "PUBLISHED WORK_e": 0.37404580152671757, "STATEMENT_loss": 3.947087287902832, "STATEMENT_f1": 0.40469407680490305, "STATEMENT_e": 0.4934210526315789, "SOCIAL MEDIA POST_loss": 6.421342372894287, "SOCIAL MEDIA POST_f1": 0.09113693324219639, "SOCIAL MEDIA POST_e": 0.18181818181818182, "PRESS REPORT_loss": 7.026741981506348, "PRESS REPORT_f1": 0.2708760847470401, "PRESS REPORT_e": 0.27586206896551724, "DECLINED COMMENT_loss": 6.057079315185547, "DECLINED COMMENT_f1": 0.34375, "DECLINED COMMENT_e": 0.4375, "PROPOSAL/ORDER/LAW_loss": 6.083311557769775, "PROPOSAL/ORDER/LAW_f1": 0.17483065083065083, "PROPOSAL/ORDER/LAW_e": 0.24, "PRICE SIGNAL_loss": 5.802990913391113, "PRICE SIGNAL_f1": 0.3157894736842105, "PRICE SIGNAL_e": 0.3684210526315789, "NARRATIVE_loss": 2.563370704650879, "NARRATIVE_f1": 0.560518411621996, "NARRATIVE_e": 0.6397058823529411, "DIRECT OBSERVATION_loss": 7.233707904815674, "DIRECT OBSERVATION_f1": 0.01694915254237288, "DIRECT OBSERVATION_e": 0.01694915254237288, "COMMUNICATION_loss": 5.04656982421875, "COMMUNICATION_f1": 0.4015110848372835, "COMMUNICATION_e": 0.40625, "PUBLIC SPEECH_loss": 6.0680975914001465, "PUBLIC SPEECH_f1": 0.255, "PUBLIC SPEECH_e": 0.4666666666666667, "VOTE/POLL_loss": 7.1436967849731445, "VOTE/POLL_f1": 0.26600790513833994, "VOTE/POLL_e": 0.2608695652173913, "COURT PROCEEDING_loss": 6.0425310134887695, "COURT PROCEEDING_f1": 0.21535511250254696, "COURT PROCEEDING_e": 0.4, "full_loss": 3.77375864982605, "full_f1": 0.4364628001202054, "full_e": 0.4684004474272931}
config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bigbird-roberta-base",
3
+ "architectures": [
4
+ "QAModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "attention_type": "original_full",
8
+ "block_size": 64,
9
+ "bos_token_id": 1,
10
+ "classifier_dropout": null,
11
+ "eos_token_id": 2,
12
+ "freeze_layers": null,
13
+ "gradient_checkpointing": false,
14
+ "hidden_act": "gelu_new",
15
+ "hidden_dropout_prob": 0.1,
16
+ "hidden_size": 768,
17
+ "include_nones_as_positives": false,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "layer_norm_eps": 1e-12,
21
+ "loss_window": null,
22
+ "max_position_embeddings": 4096,
23
+ "model_type": "big_bird",
24
+ "num_attention_heads": 12,
25
+ "num_hidden_layers": 12,
26
+ "num_random_blocks": 3,
27
+ "pad_token_id": 0,
28
+ "position_embedding_type": "absolute",
29
+ "qa_head": {},
30
+ "rescale_embeddings": false,
31
+ "sep_token_id": 66,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.12.2",
34
+ "type_vocab_size": 2,
35
+ "use_bias": true,
36
+ "use_cache": true,
37
+ "vocab_size": 50358
38
+ }
post-training eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_samples": 3576,
3
+ "test_e": 0.6023489932885906,
4
+ "test_f1": 0.5506494789202888,
5
+ "test_loss": 2.7486965656280518,
6
+ "test_runtime": 80.778,
7
+ "test_samples_per_second": 44.269,
8
+ "test_steps_per_second": 44.269
9
+ }
pre-training eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_e": 0.0,
3
+ "eval_f1": 0.003218218203323381,
4
+ "eval_loss": 7.38798189163208,
5
+ "eval_runtime": 81.0639,
6
+ "eval_samples_per_second": 44.113,
7
+ "eval_steps_per_second": 44.113
8
+ }
prediction_output.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fadb5be0bb0e164763126f1767ffd20cdbb87d2bf3b4104c66e5ed5af18d2d43
3
+ size 509994093
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "train_loss": 1.5977188166040575,
4
+ "train_runtime": 2455.1123,
5
+ "train_samples": 10138,
6
+ "train_samples_per_second": 8.259,
7
+ "train_steps_per_second": 8.259
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "global_step": 20276,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.05,
12
+ "learning_rate": 4.8767015190372854e-05,
13
+ "loss": 3.6543,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.1,
18
+ "learning_rate": 4.7534030380745706e-05,
19
+ "loss": 2.9884,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.15,
24
+ "learning_rate": 4.630104557111857e-05,
25
+ "loss": 2.6586,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.2,
30
+ "learning_rate": 4.506806076149142e-05,
31
+ "loss": 2.6187,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.25,
36
+ "learning_rate": 4.3835075951864274e-05,
37
+ "loss": 2.5105,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.3,
42
+ "learning_rate": 4.2602091142237125e-05,
43
+ "loss": 2.235,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.35,
48
+ "learning_rate": 4.1369106332609984e-05,
49
+ "loss": 2.2836,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.39,
54
+ "learning_rate": 4.013612152298284e-05,
55
+ "loss": 2.1538,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.44,
60
+ "learning_rate": 3.8903136713355694e-05,
61
+ "loss": 2.2132,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 0.49,
66
+ "learning_rate": 3.7670151903728545e-05,
67
+ "loss": 2.1561,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 0.49,
72
+ "eval_e": 0.4684004474272931,
73
+ "eval_f1": 0.4364628001202054,
74
+ "eval_loss": 3.77375864982605,
75
+ "eval_runtime": 80.8413,
76
+ "eval_samples_per_second": 44.235,
77
+ "eval_steps_per_second": 44.235,
78
+ "step": 5000
79
+ },
80
+ {
81
+ "epoch": 0.54,
82
+ "learning_rate": 3.6437167094101404e-05,
83
+ "loss": 2.0688,
84
+ "step": 5500
85
+ },
86
+ {
87
+ "epoch": 0.59,
88
+ "learning_rate": 3.5204182284474255e-05,
89
+ "loss": 2.0712,
90
+ "step": 6000
91
+ },
92
+ {
93
+ "epoch": 0.64,
94
+ "learning_rate": 3.3971197474847113e-05,
95
+ "loss": 1.7271,
96
+ "step": 6500
97
+ },
98
+ {
99
+ "epoch": 0.69,
100
+ "learning_rate": 3.2738212665219965e-05,
101
+ "loss": 1.7146,
102
+ "step": 7000
103
+ },
104
+ {
105
+ "epoch": 0.74,
106
+ "learning_rate": 3.150522785559282e-05,
107
+ "loss": 1.8106,
108
+ "step": 7500
109
+ },
110
+ {
111
+ "epoch": 0.79,
112
+ "learning_rate": 3.0272243045965675e-05,
113
+ "loss": 1.7478,
114
+ "step": 8000
115
+ },
116
+ {
117
+ "epoch": 0.84,
118
+ "learning_rate": 2.903925823633853e-05,
119
+ "loss": 1.8638,
120
+ "step": 8500
121
+ },
122
+ {
123
+ "epoch": 0.89,
124
+ "learning_rate": 2.780627342671138e-05,
125
+ "loss": 1.41,
126
+ "step": 9000
127
+ },
128
+ {
129
+ "epoch": 0.94,
130
+ "learning_rate": 2.657328861708424e-05,
131
+ "loss": 1.7941,
132
+ "step": 9500
133
+ },
134
+ {
135
+ "epoch": 0.99,
136
+ "learning_rate": 2.5340303807457095e-05,
137
+ "loss": 1.5219,
138
+ "step": 10000
139
+ },
140
+ {
141
+ "epoch": 0.99,
142
+ "eval_e": 0.5542505592841164,
143
+ "eval_f1": 0.5065431932934646,
144
+ "eval_loss": 2.430802822113037,
145
+ "eval_runtime": 80.8808,
146
+ "eval_samples_per_second": 44.213,
147
+ "eval_steps_per_second": 44.213,
148
+ "step": 10000
149
+ },
150
+ {
151
+ "epoch": 1.04,
152
+ "learning_rate": 2.4107318997829946e-05,
153
+ "loss": 1.2346,
154
+ "step": 10500
155
+ },
156
+ {
157
+ "epoch": 1.09,
158
+ "learning_rate": 2.2874334188202805e-05,
159
+ "loss": 1.2315,
160
+ "step": 11000
161
+ },
162
+ {
163
+ "epoch": 1.13,
164
+ "learning_rate": 2.1641349378575656e-05,
165
+ "loss": 1.2655,
166
+ "step": 11500
167
+ },
168
+ {
169
+ "epoch": 1.18,
170
+ "learning_rate": 2.040836456894851e-05,
171
+ "loss": 1.2839,
172
+ "step": 12000
173
+ },
174
+ {
175
+ "epoch": 1.23,
176
+ "learning_rate": 1.9175379759321366e-05,
177
+ "loss": 1.0951,
178
+ "step": 12500
179
+ },
180
+ {
181
+ "epoch": 1.28,
182
+ "learning_rate": 1.794239494969422e-05,
183
+ "loss": 1.2722,
184
+ "step": 13000
185
+ },
186
+ {
187
+ "epoch": 1.33,
188
+ "learning_rate": 1.6709410140067076e-05,
189
+ "loss": 1.0126,
190
+ "step": 13500
191
+ },
192
+ {
193
+ "epoch": 1.38,
194
+ "learning_rate": 1.547642533043993e-05,
195
+ "loss": 1.1157,
196
+ "step": 14000
197
+ },
198
+ {
199
+ "epoch": 1.43,
200
+ "learning_rate": 1.4243440520812784e-05,
201
+ "loss": 1.1724,
202
+ "step": 14500
203
+ },
204
+ {
205
+ "epoch": 1.48,
206
+ "learning_rate": 1.301045571118564e-05,
207
+ "loss": 0.9838,
208
+ "step": 15000
209
+ },
210
+ {
211
+ "epoch": 1.48,
212
+ "eval_e": 0.5724272930648769,
213
+ "eval_f1": 0.5272328806773431,
214
+ "eval_loss": 3.077216148376465,
215
+ "eval_runtime": 81.057,
216
+ "eval_samples_per_second": 44.117,
217
+ "eval_steps_per_second": 44.117,
218
+ "step": 15000
219
+ },
220
+ {
221
+ "epoch": 1.53,
222
+ "learning_rate": 1.1777470901558493e-05,
223
+ "loss": 0.9351,
224
+ "step": 15500
225
+ },
226
+ {
227
+ "epoch": 1.58,
228
+ "learning_rate": 1.0544486091931348e-05,
229
+ "loss": 0.997,
230
+ "step": 16000
231
+ },
232
+ {
233
+ "epoch": 1.63,
234
+ "learning_rate": 9.311501282304202e-06,
235
+ "loss": 0.9782,
236
+ "step": 16500
237
+ },
238
+ {
239
+ "epoch": 1.68,
240
+ "learning_rate": 8.078516472677057e-06,
241
+ "loss": 0.9934,
242
+ "step": 17000
243
+ },
244
+ {
245
+ "epoch": 1.73,
246
+ "learning_rate": 6.8455316630499115e-06,
247
+ "loss": 0.9395,
248
+ "step": 17500
249
+ },
250
+ {
251
+ "epoch": 1.78,
252
+ "learning_rate": 5.6125468534227665e-06,
253
+ "loss": 0.8001,
254
+ "step": 18000
255
+ },
256
+ {
257
+ "epoch": 1.82,
258
+ "learning_rate": 4.379562043795621e-06,
259
+ "loss": 0.9129,
260
+ "step": 18500
261
+ },
262
+ {
263
+ "epoch": 1.87,
264
+ "learning_rate": 3.1465772341684756e-06,
265
+ "loss": 1.0251,
266
+ "step": 19000
267
+ },
268
+ {
269
+ "epoch": 1.92,
270
+ "learning_rate": 1.9135924245413297e-06,
271
+ "loss": 1.0114,
272
+ "step": 19500
273
+ },
274
+ {
275
+ "epoch": 1.97,
276
+ "learning_rate": 6.806076149141843e-07,
277
+ "loss": 0.8334,
278
+ "step": 20000
279
+ },
280
+ {
281
+ "epoch": 1.97,
282
+ "eval_e": 0.6009507829977628,
283
+ "eval_f1": 0.5501093051292324,
284
+ "eval_loss": 2.754518747329712,
285
+ "eval_runtime": 81.0315,
286
+ "eval_samples_per_second": 44.131,
287
+ "eval_steps_per_second": 44.131,
288
+ "step": 20000
289
+ },
290
+ {
291
+ "epoch": 2.0,
292
+ "step": 20276,
293
+ "total_flos": 1.023741888632904e+16,
294
+ "train_loss": 1.5977188166040575,
295
+ "train_runtime": 2455.1123,
296
+ "train_samples_per_second": 8.259,
297
+ "train_steps_per_second": 8.259
298
+ }
299
+ ],
300
+ "max_steps": 20276,
301
+ "num_train_epochs": 2,
302
+ "total_flos": 1.023741888632904e+16,
303
+ "trial_name": null,
304
+ "trial_params": null
305
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:188c916efd0aab3620a4aedbf5879574a997104213f4553d6b96553c56e32c9c
3
+ size 2927