alex2awesome
commited on
Commit
•
55c3b8b
1
Parent(s):
5df7060
first commit
Browse files- all_results.json +21 -0
- callback-metrics-state-0.json +1 -0
- callback-metrics-state-10000.json +1 -0
- callback-metrics-state-15000.json +1 -0
- callback-metrics-state-20000.json +1 -0
- callback-metrics-state-5000.json +1 -0
- config.json +38 -0
- post-training eval_results.json +9 -0
- pre-training eval_results.json +8 -0
- prediction_output.jsonl +0 -0
- pytorch_model.bin +3 -0
- train_results.json +8 -0
- trainer_state.json +305 -0
- training_args.bin +3 -0
all_results.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 2.0,
|
3 |
+
"eval_e": 0.0,
|
4 |
+
"eval_f1": 0.003218218203323381,
|
5 |
+
"eval_loss": 7.38798189163208,
|
6 |
+
"eval_runtime": 81.0639,
|
7 |
+
"eval_samples": 3576,
|
8 |
+
"eval_samples_per_second": 44.113,
|
9 |
+
"eval_steps_per_second": 44.113,
|
10 |
+
"test_e": 0.6023489932885906,
|
11 |
+
"test_f1": 0.5506494789202888,
|
12 |
+
"test_loss": 2.7486965656280518,
|
13 |
+
"test_runtime": 80.778,
|
14 |
+
"test_samples_per_second": 44.269,
|
15 |
+
"test_steps_per_second": 44.269,
|
16 |
+
"train_loss": 1.5977188166040575,
|
17 |
+
"train_runtime": 2455.1123,
|
18 |
+
"train_samples": 10138,
|
19 |
+
"train_samples_per_second": 8.259,
|
20 |
+
"train_steps_per_second": 8.259
|
21 |
+
}
|
callback-metrics-state-0.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"INDIRECT QUOTE_loss": 7.3920416831970215, "INDIRECT QUOTE_f1": 0.002867467424910844, "INDIRECT QUOTE_e": 0.0, "DIRECT QUOTE_loss": 7.395792007446289, "DIRECT QUOTE_f1": 0.0026685346041993878, "DIRECT QUOTE_e": 0.0, "BACKGROUND_loss": 7.365277290344238, "BACKGROUND_f1": 0.004162366856499275, "BACKGROUND_e": 0.0, "PUBLISHED WORK_loss": 7.38569974899292, "PUBLISHED WORK_f1": 0.0026257877273695965, "PUBLISHED WORK_e": 0.0, "STATEMENT_loss": 7.371387004852295, "STATEMENT_f1": 0.002811513597655576, "STATEMENT_e": 0.0, "SOCIAL MEDIA POST_loss": 7.371214389801025, "SOCIAL MEDIA POST_f1": 0.0021667480985203876, "SOCIAL MEDIA POST_e": 0.0, "PRESS REPORT_loss": 7.400311470031738, "PRESS REPORT_f1": 0.002322142960838986, "PRESS REPORT_e": 0.0, "DECLINED COMMENT_loss": 7.410003662109375, "DECLINED COMMENT_f1": 0.0035249913168271703, "DECLINED COMMENT_e": 0.0, "PROPOSAL/ORDER/LAW_loss": 7.406617641448975, "PROPOSAL/ORDER/LAW_f1": 0.008154212035879524, "PROPOSAL/ORDER/LAW_e": 0.0, "PRICE SIGNAL_loss": 7.322811126708984, "PRICE SIGNAL_f1": 0.0020276297755392704, "PRICE SIGNAL_e": 0.0, "NARRATIVE_loss": 7.39856481552124, "NARRATIVE_f1": 0.005181297276397006, "NARRATIVE_e": 0.0, "DIRECT OBSERVATION_loss": 7.415427207946777, "DIRECT OBSERVATION_f1": 0.0, "DIRECT OBSERVATION_e": 0.0, "COMMUNICATION_loss": 7.398114204406738, "COMMUNICATION_f1": 0.003210920804232077, "COMMUNICATION_e": 0.0, "PUBLIC SPEECH_loss": 7.275794506072998, "PUBLIC SPEECH_f1": 0.010951878692647335, "PUBLIC SPEECH_e": 0.0, "VOTE/POLL_loss": 7.344000816345215, "VOTE/POLL_f1": 0.0036740648664935344, "VOTE/POLL_e": 0.0, "COURT PROCEEDING_loss": 7.464186191558838, "COURT PROCEEDING_f1": 0.0020636257293816943, "COURT PROCEEDING_e": 0.0, "full_loss": 7.38798189163208, "full_f1": 0.003218218203323381, "full_e": 0.0}
|
callback-metrics-state-10000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"INDIRECT QUOTE_loss": 2.0357444286346436, "INDIRECT QUOTE_f1": 0.5565934558159891, "INDIRECT QUOTE_e": 0.5645454545454546, "DIRECT QUOTE_loss": 2.3745360374450684, "DIRECT QUOTE_f1": 0.5075596828055501, "DIRECT QUOTE_e": 0.5552784704904405, "BACKGROUND_loss": 2.600733995437622, "BACKGROUND_f1": 0.5894816406479484, "BACKGROUND_e": 0.6398104265402843, "PUBLISHED WORK_loss": 3.1265647411346436, "PUBLISHED WORK_f1": 0.40655165655496034, "PUBLISHED WORK_e": 0.44274809160305345, "STATEMENT_loss": 2.103498935699463, "STATEMENT_f1": 0.5028104423094844, "STATEMENT_e": 0.6381578947368421, "SOCIAL MEDIA POST_loss": 4.426347732543945, "SOCIAL MEDIA POST_f1": 0.1238095238095238, "SOCIAL MEDIA POST_e": 0.24242424242424243, "PRESS REPORT_loss": 3.8396220207214355, "PRESS REPORT_f1": 0.4449627001351139, "PRESS REPORT_e": 0.4827586206896552, "DECLINED COMMENT_loss": 2.6896440982818604, "DECLINED COMMENT_f1": 0.3125, "DECLINED COMMENT_e": 0.4375, "PROPOSAL/ORDER/LAW_loss": 3.2415931224823, "PROPOSAL/ORDER/LAW_f1": 0.28395954022988507, "PROPOSAL/ORDER/LAW_e": 0.36, "PRICE SIGNAL_loss": 3.032341241836548, "PRICE SIGNAL_f1": 0.39546112988894255, "PRICE SIGNAL_e": 0.42105263157894735, "NARRATIVE_loss": 1.632098913192749, "NARRATIVE_f1": 0.5818456841015806, "NARRATIVE_e": 0.6764705882352942, "DIRECT OBSERVATION_loss": 4.492552757263184, "DIRECT OBSERVATION_f1": 0.01694915254237288, "DIRECT OBSERVATION_e": 0.11864406779661017, "COMMUNICATION_loss": 2.633908748626709, "COMMUNICATION_f1": 0.546875, "COMMUNICATION_e": 0.546875, "PUBLIC SPEECH_loss": 3.456928014755249, "PUBLIC SPEECH_f1": 0.27166666666666667, "PUBLIC SPEECH_e": 0.6333333333333333, "VOTE/POLL_loss": 4.232885360717773, "VOTE/POLL_f1": 0.29275160256595884, "VOTE/POLL_e": 0.30434782608695654, "COURT PROCEEDING_loss": 3.8775787353515625, "COURT PROCEEDING_f1": 0.3294430014430014, "COURT PROCEEDING_e": 0.44, "full_loss": 2.430802822113037, "full_f1": 0.5065431932934646, "full_e": 0.5542505592841164}
|
callback-metrics-state-15000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"INDIRECT QUOTE_loss": 2.478729248046875, "INDIRECT QUOTE_f1": 0.5864855228766466, "INDIRECT QUOTE_e": 0.5954545454545455, "DIRECT QUOTE_loss": 3.0114314556121826, "DIRECT QUOTE_f1": 0.530530760516083, "DIRECT QUOTE_e": 0.5685785536159601, "BACKGROUND_loss": 3.5276763439178467, "BACKGROUND_f1": 0.5905275636316465, "BACKGROUND_e": 0.6350710900473934, "PUBLISHED WORK_loss": 4.573206901550293, "PUBLISHED WORK_f1": 0.427382933012704, "PUBLISHED WORK_e": 0.48854961832061067, "STATEMENT_loss": 2.3900763988494873, "STATEMENT_f1": 0.5186403508771931, "STATEMENT_e": 0.6447368421052632, "SOCIAL MEDIA POST_loss": 5.451953887939453, "SOCIAL MEDIA POST_f1": 0.18181818181818182, "SOCIAL MEDIA POST_e": 0.30303030303030304, "PRESS REPORT_loss": 5.020430564880371, "PRESS REPORT_f1": 0.5002736726874658, "PRESS REPORT_e": 0.5344827586206896, "DECLINED COMMENT_loss": 3.7660765647888184, "DECLINED COMMENT_f1": 0.4068688118811881, "DECLINED COMMENT_e": 0.5625, "PROPOSAL/ORDER/LAW_loss": 4.049454689025879, "PROPOSAL/ORDER/LAW_f1": 0.2889760348583878, "PROPOSAL/ORDER/LAW_e": 0.4266666666666667, "PRICE SIGNAL_loss": 3.616384744644165, "PRICE SIGNAL_f1": 0.4947368421052632, "PRICE SIGNAL_e": 0.5263157894736842, "NARRATIVE_loss": 1.9826314449310303, "NARRATIVE_f1": 0.6029803893362629, "NARRATIVE_e": 0.6985294117647058, "DIRECT OBSERVATION_loss": 5.71368932723999, "DIRECT OBSERVATION_f1": 0.01694915254237288, "DIRECT OBSERVATION_e": 0.2033898305084746, "COMMUNICATION_loss": 3.4060516357421875, "COMMUNICATION_f1": 0.5073826058201059, "COMMUNICATION_e": 0.5, "PUBLIC SPEECH_loss": 3.7426810264587402, "PUBLIC SPEECH_f1": 0.25891142160512753, "PUBLIC SPEECH_e": 0.4666666666666667, "VOTE/POLL_loss": 4.689935207366943, "VOTE/POLL_f1": 0.3312252964426877, "VOTE/POLL_e": 0.34782608695652173, "COURT PROCEEDING_loss": 5.710793972015381, "COURT PROCEEDING_f1": 0.2707763347763348, "COURT PROCEEDING_e": 0.44, "full_loss": 3.077216148376465, "full_f1": 0.5272328806773431, "full_e": 0.5724272930648769}
|
callback-metrics-state-20000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"INDIRECT QUOTE_loss": 2.1148886680603027, "INDIRECT QUOTE_f1": 0.6061409545975309, "INDIRECT QUOTE_e": 0.6154545454545455, "DIRECT QUOTE_loss": 2.783146381378174, "DIRECT QUOTE_f1": 0.5601386615724058, "DIRECT QUOTE_e": 0.6043225270157938, "BACKGROUND_loss": 3.3464651107788086, "BACKGROUND_f1": 0.6221739141179379, "BACKGROUND_e": 0.6682464454976303, "PUBLISHED WORK_loss": 3.9592363834381104, "PUBLISHED WORK_f1": 0.43717210437759113, "PUBLISHED WORK_e": 0.5038167938931297, "STATEMENT_loss": 2.1502439975738525, "STATEMENT_f1": 0.5222579773709637, "STATEMENT_e": 0.6381578947368421, "SOCIAL MEDIA POST_loss": 4.850955963134766, "SOCIAL MEDIA POST_f1": 0.15151515151515152, "SOCIAL MEDIA POST_e": 0.2727272727272727, "PRESS REPORT_loss": 4.060721397399902, "PRESS REPORT_f1": 0.45439298174801934, "PRESS REPORT_e": 0.5, "DECLINED COMMENT_loss": 3.9542932510375977, "DECLINED COMMENT_f1": 0.375, "DECLINED COMMENT_e": 0.5, "PROPOSAL/ORDER/LAW_loss": 3.2367727756500244, "PROPOSAL/ORDER/LAW_f1": 0.32273568107458245, "PROPOSAL/ORDER/LAW_e": 0.44, "PRICE SIGNAL_loss": 3.1010141372680664, "PRICE SIGNAL_f1": 0.45087719298245615, "PRICE SIGNAL_e": 0.47368421052631576, "NARRATIVE_loss": 1.8089553117752075, "NARRATIVE_f1": 0.6354846971078675, "NARRATIVE_e": 0.7352941176470589, "DIRECT OBSERVATION_loss": 3.868048906326294, "DIRECT OBSERVATION_f1": 0.01694915254237288, "DIRECT OBSERVATION_e": 0.4067796610169492, "COMMUNICATION_loss": 3.3353352546691895, "COMMUNICATION_f1": 0.570775462962963, "COMMUNICATION_e": 0.5625, "PUBLIC SPEECH_loss": 3.9616973400115967, "PUBLIC SPEECH_f1": 0.3, "PUBLIC SPEECH_e": 0.5666666666666667, "VOTE/POLL_loss": 4.4696855545043945, "VOTE/POLL_f1": 0.2924901185770751, "VOTE/POLL_e": 0.30434782608695654, "COURT PROCEEDING_loss": 5.168118953704834, "COURT PROCEEDING_f1": 0.34277633477633473, "COURT PROCEEDING_e": 0.44, "full_loss": 2.754518747329712, "full_f1": 0.5501093051292324, "full_e": 0.6009507829977628}
|
callback-metrics-state-5000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"INDIRECT QUOTE_loss": 2.8530545234680176, "INDIRECT QUOTE_f1": 0.4941323500870946, "INDIRECT QUOTE_e": 0.5, "DIRECT QUOTE_loss": 3.684295892715454, "DIRECT QUOTE_f1": 0.42220711015100093, "DIRECT QUOTE_e": 0.4513715710723192, "BACKGROUND_loss": 3.72399640083313, "BACKGROUND_f1": 0.5459073471062601, "BACKGROUND_e": 0.5829383886255924, "PUBLISHED WORK_loss": 5.283926010131836, "PUBLISHED WORK_f1": 0.33880863285241525, "PUBLISHED WORK_e": 0.37404580152671757, "STATEMENT_loss": 3.947087287902832, "STATEMENT_f1": 0.40469407680490305, "STATEMENT_e": 0.4934210526315789, "SOCIAL MEDIA POST_loss": 6.421342372894287, "SOCIAL MEDIA POST_f1": 0.09113693324219639, "SOCIAL MEDIA POST_e": 0.18181818181818182, "PRESS REPORT_loss": 7.026741981506348, "PRESS REPORT_f1": 0.2708760847470401, "PRESS REPORT_e": 0.27586206896551724, "DECLINED COMMENT_loss": 6.057079315185547, "DECLINED COMMENT_f1": 0.34375, "DECLINED COMMENT_e": 0.4375, "PROPOSAL/ORDER/LAW_loss": 6.083311557769775, "PROPOSAL/ORDER/LAW_f1": 0.17483065083065083, "PROPOSAL/ORDER/LAW_e": 0.24, "PRICE SIGNAL_loss": 5.802990913391113, "PRICE SIGNAL_f1": 0.3157894736842105, "PRICE SIGNAL_e": 0.3684210526315789, "NARRATIVE_loss": 2.563370704650879, "NARRATIVE_f1": 0.560518411621996, "NARRATIVE_e": 0.6397058823529411, "DIRECT OBSERVATION_loss": 7.233707904815674, "DIRECT OBSERVATION_f1": 0.01694915254237288, "DIRECT OBSERVATION_e": 0.01694915254237288, "COMMUNICATION_loss": 5.04656982421875, "COMMUNICATION_f1": 0.4015110848372835, "COMMUNICATION_e": 0.40625, "PUBLIC SPEECH_loss": 6.0680975914001465, "PUBLIC SPEECH_f1": 0.255, "PUBLIC SPEECH_e": 0.4666666666666667, "VOTE/POLL_loss": 7.1436967849731445, "VOTE/POLL_f1": 0.26600790513833994, "VOTE/POLL_e": 0.2608695652173913, "COURT PROCEEDING_loss": 6.0425310134887695, "COURT PROCEEDING_f1": 0.21535511250254696, "COURT PROCEEDING_e": 0.4, "full_loss": 3.77375864982605, "full_f1": 0.4364628001202054, "full_e": 0.4684004474272931}
|
config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "google/bigbird-roberta-base",
|
3 |
+
"architectures": [
|
4 |
+
"QAModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"attention_type": "original_full",
|
8 |
+
"block_size": 64,
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"classifier_dropout": null,
|
11 |
+
"eos_token_id": 2,
|
12 |
+
"freeze_layers": null,
|
13 |
+
"gradient_checkpointing": false,
|
14 |
+
"hidden_act": "gelu_new",
|
15 |
+
"hidden_dropout_prob": 0.1,
|
16 |
+
"hidden_size": 768,
|
17 |
+
"include_nones_as_positives": false,
|
18 |
+
"initializer_range": 0.02,
|
19 |
+
"intermediate_size": 3072,
|
20 |
+
"layer_norm_eps": 1e-12,
|
21 |
+
"loss_window": null,
|
22 |
+
"max_position_embeddings": 4096,
|
23 |
+
"model_type": "big_bird",
|
24 |
+
"num_attention_heads": 12,
|
25 |
+
"num_hidden_layers": 12,
|
26 |
+
"num_random_blocks": 3,
|
27 |
+
"pad_token_id": 0,
|
28 |
+
"position_embedding_type": "absolute",
|
29 |
+
"qa_head": {},
|
30 |
+
"rescale_embeddings": false,
|
31 |
+
"sep_token_id": 66,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.12.2",
|
34 |
+
"type_vocab_size": 2,
|
35 |
+
"use_bias": true,
|
36 |
+
"use_cache": true,
|
37 |
+
"vocab_size": 50358
|
38 |
+
}
|
post-training eval_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"eval_samples": 3576,
|
3 |
+
"test_e": 0.6023489932885906,
|
4 |
+
"test_f1": 0.5506494789202888,
|
5 |
+
"test_loss": 2.7486965656280518,
|
6 |
+
"test_runtime": 80.778,
|
7 |
+
"test_samples_per_second": 44.269,
|
8 |
+
"test_steps_per_second": 44.269
|
9 |
+
}
|
pre-training eval_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"eval_e": 0.0,
|
3 |
+
"eval_f1": 0.003218218203323381,
|
4 |
+
"eval_loss": 7.38798189163208,
|
5 |
+
"eval_runtime": 81.0639,
|
6 |
+
"eval_samples_per_second": 44.113,
|
7 |
+
"eval_steps_per_second": 44.113
|
8 |
+
}
|
prediction_output.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fadb5be0bb0e164763126f1767ffd20cdbb87d2bf3b4104c66e5ed5af18d2d43
|
3 |
+
size 509994093
|
train_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 2.0,
|
3 |
+
"train_loss": 1.5977188166040575,
|
4 |
+
"train_runtime": 2455.1123,
|
5 |
+
"train_samples": 10138,
|
6 |
+
"train_samples_per_second": 8.259,
|
7 |
+
"train_steps_per_second": 8.259
|
8 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"global_step": 20276,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.05,
|
12 |
+
"learning_rate": 4.8767015190372854e-05,
|
13 |
+
"loss": 3.6543,
|
14 |
+
"step": 500
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.1,
|
18 |
+
"learning_rate": 4.7534030380745706e-05,
|
19 |
+
"loss": 2.9884,
|
20 |
+
"step": 1000
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"epoch": 0.15,
|
24 |
+
"learning_rate": 4.630104557111857e-05,
|
25 |
+
"loss": 2.6586,
|
26 |
+
"step": 1500
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"epoch": 0.2,
|
30 |
+
"learning_rate": 4.506806076149142e-05,
|
31 |
+
"loss": 2.6187,
|
32 |
+
"step": 2000
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"epoch": 0.25,
|
36 |
+
"learning_rate": 4.3835075951864274e-05,
|
37 |
+
"loss": 2.5105,
|
38 |
+
"step": 2500
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"epoch": 0.3,
|
42 |
+
"learning_rate": 4.2602091142237125e-05,
|
43 |
+
"loss": 2.235,
|
44 |
+
"step": 3000
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.35,
|
48 |
+
"learning_rate": 4.1369106332609984e-05,
|
49 |
+
"loss": 2.2836,
|
50 |
+
"step": 3500
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"epoch": 0.39,
|
54 |
+
"learning_rate": 4.013612152298284e-05,
|
55 |
+
"loss": 2.1538,
|
56 |
+
"step": 4000
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"epoch": 0.44,
|
60 |
+
"learning_rate": 3.8903136713355694e-05,
|
61 |
+
"loss": 2.2132,
|
62 |
+
"step": 4500
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"epoch": 0.49,
|
66 |
+
"learning_rate": 3.7670151903728545e-05,
|
67 |
+
"loss": 2.1561,
|
68 |
+
"step": 5000
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"epoch": 0.49,
|
72 |
+
"eval_e": 0.4684004474272931,
|
73 |
+
"eval_f1": 0.4364628001202054,
|
74 |
+
"eval_loss": 3.77375864982605,
|
75 |
+
"eval_runtime": 80.8413,
|
76 |
+
"eval_samples_per_second": 44.235,
|
77 |
+
"eval_steps_per_second": 44.235,
|
78 |
+
"step": 5000
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"epoch": 0.54,
|
82 |
+
"learning_rate": 3.6437167094101404e-05,
|
83 |
+
"loss": 2.0688,
|
84 |
+
"step": 5500
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"epoch": 0.59,
|
88 |
+
"learning_rate": 3.5204182284474255e-05,
|
89 |
+
"loss": 2.0712,
|
90 |
+
"step": 6000
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"epoch": 0.64,
|
94 |
+
"learning_rate": 3.3971197474847113e-05,
|
95 |
+
"loss": 1.7271,
|
96 |
+
"step": 6500
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"epoch": 0.69,
|
100 |
+
"learning_rate": 3.2738212665219965e-05,
|
101 |
+
"loss": 1.7146,
|
102 |
+
"step": 7000
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"epoch": 0.74,
|
106 |
+
"learning_rate": 3.150522785559282e-05,
|
107 |
+
"loss": 1.8106,
|
108 |
+
"step": 7500
|
109 |
+
},
|
110 |
+
{
|
111 |
+
"epoch": 0.79,
|
112 |
+
"learning_rate": 3.0272243045965675e-05,
|
113 |
+
"loss": 1.7478,
|
114 |
+
"step": 8000
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.84,
|
118 |
+
"learning_rate": 2.903925823633853e-05,
|
119 |
+
"loss": 1.8638,
|
120 |
+
"step": 8500
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"epoch": 0.89,
|
124 |
+
"learning_rate": 2.780627342671138e-05,
|
125 |
+
"loss": 1.41,
|
126 |
+
"step": 9000
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"epoch": 0.94,
|
130 |
+
"learning_rate": 2.657328861708424e-05,
|
131 |
+
"loss": 1.7941,
|
132 |
+
"step": 9500
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"epoch": 0.99,
|
136 |
+
"learning_rate": 2.5340303807457095e-05,
|
137 |
+
"loss": 1.5219,
|
138 |
+
"step": 10000
|
139 |
+
},
|
140 |
+
{
|
141 |
+
"epoch": 0.99,
|
142 |
+
"eval_e": 0.5542505592841164,
|
143 |
+
"eval_f1": 0.5065431932934646,
|
144 |
+
"eval_loss": 2.430802822113037,
|
145 |
+
"eval_runtime": 80.8808,
|
146 |
+
"eval_samples_per_second": 44.213,
|
147 |
+
"eval_steps_per_second": 44.213,
|
148 |
+
"step": 10000
|
149 |
+
},
|
150 |
+
{
|
151 |
+
"epoch": 1.04,
|
152 |
+
"learning_rate": 2.4107318997829946e-05,
|
153 |
+
"loss": 1.2346,
|
154 |
+
"step": 10500
|
155 |
+
},
|
156 |
+
{
|
157 |
+
"epoch": 1.09,
|
158 |
+
"learning_rate": 2.2874334188202805e-05,
|
159 |
+
"loss": 1.2315,
|
160 |
+
"step": 11000
|
161 |
+
},
|
162 |
+
{
|
163 |
+
"epoch": 1.13,
|
164 |
+
"learning_rate": 2.1641349378575656e-05,
|
165 |
+
"loss": 1.2655,
|
166 |
+
"step": 11500
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"epoch": 1.18,
|
170 |
+
"learning_rate": 2.040836456894851e-05,
|
171 |
+
"loss": 1.2839,
|
172 |
+
"step": 12000
|
173 |
+
},
|
174 |
+
{
|
175 |
+
"epoch": 1.23,
|
176 |
+
"learning_rate": 1.9175379759321366e-05,
|
177 |
+
"loss": 1.0951,
|
178 |
+
"step": 12500
|
179 |
+
},
|
180 |
+
{
|
181 |
+
"epoch": 1.28,
|
182 |
+
"learning_rate": 1.794239494969422e-05,
|
183 |
+
"loss": 1.2722,
|
184 |
+
"step": 13000
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 1.33,
|
188 |
+
"learning_rate": 1.6709410140067076e-05,
|
189 |
+
"loss": 1.0126,
|
190 |
+
"step": 13500
|
191 |
+
},
|
192 |
+
{
|
193 |
+
"epoch": 1.38,
|
194 |
+
"learning_rate": 1.547642533043993e-05,
|
195 |
+
"loss": 1.1157,
|
196 |
+
"step": 14000
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"epoch": 1.43,
|
200 |
+
"learning_rate": 1.4243440520812784e-05,
|
201 |
+
"loss": 1.1724,
|
202 |
+
"step": 14500
|
203 |
+
},
|
204 |
+
{
|
205 |
+
"epoch": 1.48,
|
206 |
+
"learning_rate": 1.301045571118564e-05,
|
207 |
+
"loss": 0.9838,
|
208 |
+
"step": 15000
|
209 |
+
},
|
210 |
+
{
|
211 |
+
"epoch": 1.48,
|
212 |
+
"eval_e": 0.5724272930648769,
|
213 |
+
"eval_f1": 0.5272328806773431,
|
214 |
+
"eval_loss": 3.077216148376465,
|
215 |
+
"eval_runtime": 81.057,
|
216 |
+
"eval_samples_per_second": 44.117,
|
217 |
+
"eval_steps_per_second": 44.117,
|
218 |
+
"step": 15000
|
219 |
+
},
|
220 |
+
{
|
221 |
+
"epoch": 1.53,
|
222 |
+
"learning_rate": 1.1777470901558493e-05,
|
223 |
+
"loss": 0.9351,
|
224 |
+
"step": 15500
|
225 |
+
},
|
226 |
+
{
|
227 |
+
"epoch": 1.58,
|
228 |
+
"learning_rate": 1.0544486091931348e-05,
|
229 |
+
"loss": 0.997,
|
230 |
+
"step": 16000
|
231 |
+
},
|
232 |
+
{
|
233 |
+
"epoch": 1.63,
|
234 |
+
"learning_rate": 9.311501282304202e-06,
|
235 |
+
"loss": 0.9782,
|
236 |
+
"step": 16500
|
237 |
+
},
|
238 |
+
{
|
239 |
+
"epoch": 1.68,
|
240 |
+
"learning_rate": 8.078516472677057e-06,
|
241 |
+
"loss": 0.9934,
|
242 |
+
"step": 17000
|
243 |
+
},
|
244 |
+
{
|
245 |
+
"epoch": 1.73,
|
246 |
+
"learning_rate": 6.8455316630499115e-06,
|
247 |
+
"loss": 0.9395,
|
248 |
+
"step": 17500
|
249 |
+
},
|
250 |
+
{
|
251 |
+
"epoch": 1.78,
|
252 |
+
"learning_rate": 5.6125468534227665e-06,
|
253 |
+
"loss": 0.8001,
|
254 |
+
"step": 18000
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 1.82,
|
258 |
+
"learning_rate": 4.379562043795621e-06,
|
259 |
+
"loss": 0.9129,
|
260 |
+
"step": 18500
|
261 |
+
},
|
262 |
+
{
|
263 |
+
"epoch": 1.87,
|
264 |
+
"learning_rate": 3.1465772341684756e-06,
|
265 |
+
"loss": 1.0251,
|
266 |
+
"step": 19000
|
267 |
+
},
|
268 |
+
{
|
269 |
+
"epoch": 1.92,
|
270 |
+
"learning_rate": 1.9135924245413297e-06,
|
271 |
+
"loss": 1.0114,
|
272 |
+
"step": 19500
|
273 |
+
},
|
274 |
+
{
|
275 |
+
"epoch": 1.97,
|
276 |
+
"learning_rate": 6.806076149141843e-07,
|
277 |
+
"loss": 0.8334,
|
278 |
+
"step": 20000
|
279 |
+
},
|
280 |
+
{
|
281 |
+
"epoch": 1.97,
|
282 |
+
"eval_e": 0.6009507829977628,
|
283 |
+
"eval_f1": 0.5501093051292324,
|
284 |
+
"eval_loss": 2.754518747329712,
|
285 |
+
"eval_runtime": 81.0315,
|
286 |
+
"eval_samples_per_second": 44.131,
|
287 |
+
"eval_steps_per_second": 44.131,
|
288 |
+
"step": 20000
|
289 |
+
},
|
290 |
+
{
|
291 |
+
"epoch": 2.0,
|
292 |
+
"step": 20276,
|
293 |
+
"total_flos": 1.023741888632904e+16,
|
294 |
+
"train_loss": 1.5977188166040575,
|
295 |
+
"train_runtime": 2455.1123,
|
296 |
+
"train_samples_per_second": 8.259,
|
297 |
+
"train_steps_per_second": 8.259
|
298 |
+
}
|
299 |
+
],
|
300 |
+
"max_steps": 20276,
|
301 |
+
"num_train_epochs": 2,
|
302 |
+
"total_flos": 1.023741888632904e+16,
|
303 |
+
"trial_name": null,
|
304 |
+
"trial_params": null
|
305 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:188c916efd0aab3620a4aedbf5879574a997104213f4553d6b96553c56e32c9c
|
3 |
+
size 2927
|