|
[[36m2024-05-28 23:33:58,508[39m][[34m__main__[39m][[32mINFO[39m] - Save taskmodule to /home/arne/projects/pie-document-level/models/dataset-sciarg/task-ner_re/v0.3/2024-05-28_23-33-46 [push_to_hub=False] |
|
[[36m2024-05-28 23:33:58,512[39m][[34m__main__[39m][[32mINFO[39m] - Starting training! |
|
[[36m2024-05-28 23:33:59,216[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A02 to token based documents, missed annotations (disable this message with verbose=False): |
|
{ |
|
"binary_relations": "{BinaryRelation(head=LabeledSpan(start=23576, end=23668, label='own_claim', score=1.0), tail=LabeledSpan(start=24841, end=25014, label='own_claim', score=1.0), label='semantically_same', score=1.0)}" |
|
} |
|
[[36m2024-05-28 23:33:59,348[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A06 to token based documents, missed annotations (disable this message with verbose=False): |
|
{ |
|
"binary_relations": "{BinaryRelation(head=LabeledSpan(start=38474, end=38482, label='own_claim', score=1.0), tail=LabeledSpan(start=39445, end=39547, label='own_claim', score=1.0), label='parts_of_same', score=1.0)}" |
|
} |
|
[[36m2024-05-28 23:33:59,567[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A13 to token based documents, missed annotations (disable this message with verbose=False): |
|
{ |
|
"binary_relations": "{BinaryRelation(head=LabeledSpan(start=36143, end=36172, label='own_claim', score=1.0), tail=LabeledSpan(start=37284, end=37312, label='own_claim', score=1.0), label='parts_of_same', score=1.0)}" |
|
} |
|
encode inputs: 37%|ββββββββββββββββββββββββββββββββββββββββββββββββββ | 10/27 [00:01<00:02, 7.38it/s] |
|
[[36m2024-05-28 23:34:00,685[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A22 to token based documents, missed annotations (disable this message with verbose=False): |
|
{ |
|
"binary_relations": "{BinaryRelation(head=LabeledSpan(start=34492, end=34542, label='background_claim', score=1.0), tail=LabeledSpan(start=33705, end=33750, label='background_claim', score=1.0), label='supports', score=1.0)}" |
|
} |
|
[[36m2024-05-28 23:34:01,674[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A01 to token based documents, missed annotations (disable this message with verbose=False): |
|
{ |
|
"binary_relations": "{BinaryRelation(head=LabeledSpan(start=9590, end=9619, label='own_claim', score=1.0), tail=LabeledSpan(start=15220, end=15338, label='own_claim', score=1.0), label='semantically_same', score=1.0), BinaryRelation(head=LabeledSpan(start=15220, end=15338, label='own_claim', score=1.0), tail=LabeledSpan(start=15478, end=15498, label='own_claim', score=1.0), label='semantically_same', score=1.0), BinaryRelation(head=LabeledSpan(start=17950, end=17980, label='own_claim', score=1.0), tail=LabeledSpan(start=28960, end=29042, label='own_claim', score=1.0), label='semantically_same', score=1.0), BinaryRelation(head=LabeledSpan(start=5866, end=5919, label='own_claim', score=1.0), tail=LabeledSpan(start=9263, end=9308, label='own_claim', score=1.0), label='semantically_same', score=1.0)}" |
|
} |
|
[[36m2024-05-28 23:34:01,906[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A20 to token based documents, missed annotations (disable this message with verbose=False): |
|
{ |
|
"binary_relations": "{BinaryRelation(head=LabeledSpan(start=10323, end=10462, label='own_claim', score=1.0), tail=LabeledSpan(start=11788, end=11920, label='own_claim', score=1.0), label='semantically_same', score=1.0)}" |
|
} |
|
[[36m2024-05-28 23:34:02,269[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A29 to token based documents, missed annotations (disable this message with verbose=False): |
|
{ |
|
"binary_relations": "{BinaryRelation(head=LabeledSpan(start=19620, end=19626, label='data', score=1.0), tail=LabeledSpan(start=19919, end=19964, label='own_claim', score=1.0), label='supports', score=1.0)}" |
|
encode inputs: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 27/27 [00:03<00:00, 7.83it/s] |
|
encode targets: 0%| | 0/537 [00:00<?, ?it/s] |
|
|
|
encode targets: 82%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 442/537 [00:02<00:00, 238.39it/s] |
|
[[36m2024-05-28 23:34:04,875[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A04 to token based documents, missed annotations (disable this message with verbose=False): |
|
{ |
|
"binary_relations": "{BinaryRelation(head=LabeledSpan(start=3205, end=3270, label='background_claim', score=1.0), tail=LabeledSpan(start=7724, end=7814, label='background_claim', score=1.0), label='semantically_same', score=1.0)}" |
|
} |
|
βββββββ³βββββββββββββββββββββββββββββββββββββββββββββββββββββββ³ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ³βββββββββ |
|
β[1m [22mβ[1m Name [22mβ[1m Type [22mβ[1m Params [22mβ |
|
β‘ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ© |
|
β 0 β metric_val β WrappedLayerMetricsWithUnbatchAndDecodeWithErrorsFunction β 0 β |
|
β 1 β metric_val.layer_metrics β ModuleDict β 0 β |
|
β 2 β metric_val.layer_metrics.labeled_spans β PrecisionRecallAndF1ForLabeledAnnotations β 0 β |
|
β 3 β metric_val.layer_metrics.binary_relations β PrecisionRecallAndF1ForLabeledAnnotations β 0 β |
|
β 4 β metric_test β WrappedLayerMetricsWithUnbatchAndDecodeWithErrorsFunction β 0 β |
|
β 5 β metric_test.layer_metrics β ModuleDict β 0 β |
|
β 6 β metric_test.layer_metrics.labeled_spans β PrecisionRecallAndF1ForLabeledAnnotations β 0 β |
|
β 7 β metric_test.layer_metrics.binary_relations β PrecisionRecallAndF1ForLabeledAnnotations β 0 β |
|
β 8 β metric_train β WrappedLayerMetricsWithUnbatchAndDecodeWithErrorsFunction β 0 β |
|
β 9 β metric_train.layer_metrics β ModuleDict β 0 β |
|
β 10 β metric_train.layer_metrics.labeled_spans β PrecisionRecallAndF1ForLabeledAnnotations β 0 β |
|
β 11 β metric_train.layer_metrics.binary_relations β PrecisionRecallAndF1ForLabeledAnnotations β 0 β |
|
β 12 β model β BartAsPointerNetwork β 141 M β |
|
β 13 β model.model β BartModelWithDecoderPositionIds β 139 M β |
|
β 14 β model.model.shared β Embedding β 38.6 M β |
|
β 15 β model.model.encoder β BartEncoder β 81.9 M β |
|
β 16 β model.model.encoder.embed_positions β BartLearnedPositionalEmbedding β 787 K β |
|
β 17 β model.model.encoder.layers β ModuleList β 42.5 M β |
|
β 18 β model.model.encoder.layers.0 β BartEncoderLayer β 7.1 M β |
|
β 19 β model.model.encoder.layers.0.self_attn β BartSdpaAttention β 2.4 M β |
|
β 20 β model.model.encoder.layers.0.self_attn.k_proj β Linear β 590 K β |
|
β 21 β model.model.encoder.layers.0.self_attn.v_proj β Linear β 590 K β |
|
β 22 β model.model.encoder.layers.0.self_attn.q_proj β Linear β 590 K β |
|
β 23 β model.model.encoder.layers.0.self_attn.out_proj β Linear β 590 K β |
|
β 24 β model.model.encoder.layers.0.self_attn_layer_norm β LayerNorm β 1.5 K β |
|
β 25 β model.model.encoder.layers.0.activation_fn β GELUActivation β 0 β |
|
β 26 β model.model.encoder.layers.0.fc1 β Linear β 2.4 M β |
|
β 27 β model.model.encoder.layers.0.fc2 β Linear β 2.4 M β |
|
β 28 β model.model.encoder.layers.0.final_layer_norm β LayerNorm β 1.5 K β |
|
β 29 β model.model.encoder.layers.1 β BartEncoderLayer β 7.1 M β |
|
β 30 β model.model.encoder.layers.1.self_attn β BartSdpaAttention β 2.4 M β |
|
β 31 β model.model.encoder.layers.1.self_attn.k_proj β Linear β 590 K β |
|
β 32 β model.model.encoder.layers.1.self_attn.v_proj β Linear β 590 K β |
|
β 33 β model.model.encoder.layers.1.self_attn.q_proj β Linear β 590 K β |
|
β 34 β model.model.encoder.layers.1.self_attn.out_proj β Linear β 590 K β |
|
β 35 β model.model.encoder.layers.1.self_attn_layer_norm β LayerNorm β 1.5 K β |
|
β 36 β model.model.encoder.layers.1.activation_fn β GELUActivation β 0 β |
|
β 37 β model.model.encoder.layers.1.fc1 β Linear β 2.4 M β |
|
β 38 β model.model.encoder.layers.1.fc2 β Linear β 2.4 M β |
|
β 39 β model.model.encoder.layers.1.final_layer_norm β LayerNorm β 1.5 K β |
|
β 40 β model.model.encoder.layers.2 β BartEncoderLayer β 7.1 M β |
|
β 41 β model.model.encoder.layers.2.self_attn β BartSdpaAttention β 2.4 M β |
|
β 42 β model.model.encoder.layers.2.self_attn.k_proj β Linear β 590 K β |
|
β 43 β model.model.encoder.layers.2.self_attn.v_proj β Linear β 590 K β |
|
β 44 β model.model.encoder.layers.2.self_attn.q_proj β Linear β 590 K β |
|
β 45 β model.model.encoder.layers.2.self_attn.out_proj β Linear β 590 K β |
|
β 46 β model.model.encoder.layers.2.self_attn_layer_norm β LayerNorm β 1.5 K β |
|
β 47 β model.model.encoder.layers.2.activation_fn β GELUActivation β 0 β |
|
β 48 β model.model.encoder.layers.2.fc1 β Linear β 2.4 M β |
|
β 49 β model.model.encoder.layers.2.fc2 β Linear β 2.4 M β |
|
β 50 β model.model.encoder.layers.2.final_layer_norm β LayerNorm β 1.5 K β |
|
β 51 β model.model.encoder.layers.3 β BartEncoderLayer β 7.1 M β |
|
β 52 β model.model.encoder.layers.3.self_attn β BartSdpaAttention β 2.4 M β |
|
β 53 β model.model.encoder.layers.3.self_attn.k_proj β Linear β 590 K β |
|
β 54 β model.model.encoder.layers.3.self_attn.v_proj β Linear β 590 K β |
|
β 55 β model.model.encoder.layers.3.self_attn.q_proj β Linear β 590 K β |
|
β 56 β model.model.encoder.layers.3.self_attn.out_proj β Linear β 590 K β |
|
β 57 β model.model.encoder.layers.3.self_attn_layer_norm β LayerNorm β 1.5 K β |
|
β 58 β model.model.encoder.layers.3.activation_fn β GELUActivation β 0 β |
|
β 59 β model.model.encoder.layers.3.fc1 β Linear β 2.4 M β |
|
β 60 β model.model.encoder.layers.3.fc2 β Linear β 2.4 M β |
|
β 61 β model.model.encoder.layers.3.final_layer_norm β LayerNorm β 1.5 K β |
|
β 62 β model.model.encoder.layers.4 β BartEncoderLayer β 7.1 M β |
|
β 63 β model.model.encoder.layers.4.self_attn β BartSdpaAttention β 2.4 M β |
|
β 64 β model.model.encoder.layers.4.self_attn.k_proj β Linear β 590 K β |
|
β 65 β model.model.encoder.layers.4.self_attn.v_proj β Linear β 590 K β |
|
β 66 β model.model.encoder.layers.4.self_attn.q_proj β Linear β 590 K β |
|
β 67 β model.model.encoder.layers.4.self_attn.out_proj β Linear β 590 K β |
|
β 68 β model.model.encoder.layers.4.self_attn_layer_norm β LayerNorm β 1.5 K β |
|
β 69 β model.model.encoder.layers.4.activation_fn β GELUActivation β 0 β |
|
β 70 β model.model.encoder.layers.4.fc1 β Linear β 2.4 M β |
|
β 71 β model.model.encoder.layers.4.fc2 β Linear β 2.4 M β |
|
β 72 β model.model.encoder.layers.4.final_layer_norm β LayerNorm β 1.5 K β |
|
β 73 β model.model.encoder.layers.5 β BartEncoderLayer β 7.1 M β |
|
β 74 β model.model.encoder.layers.5.self_attn β BartSdpaAttention β 2.4 M β |
|
β 75 β model.model.encoder.layers.5.self_attn.k_proj β Linear β 590 K β |
|
β 76 β model.model.encoder.layers.5.self_attn.v_proj β Linear β 590 K β |
|
β 77 β model.model.encoder.layers.5.self_attn.q_proj β Linear β 590 K β |
|
β 78 β model.model.encoder.layers.5.self_attn.out_proj β Linear β 590 K β |
|
β 79 β model.model.encoder.layers.5.self_attn_layer_norm β LayerNorm β 1.5 K β |
|
β 80 β model.model.encoder.layers.5.activation_fn β GELUActivation β 0 β |
|
β 81 β model.model.encoder.layers.5.fc1 β Linear β 2.4 M β |
|
β 82 β model.model.encoder.layers.5.fc2 β Linear β 2.4 M β |
|
β 83 β model.model.encoder.layers.5.final_layer_norm β LayerNorm β 1.5 K β |
|
β 84 β model.model.encoder.layernorm_embedding β LayerNorm β 1.5 K β |
|
β 85 β model.model.decoder β BartDecoderWithPositionIds β 96.1 M β |
|
β 86 β model.model.decoder.embed_positions β BartLearnedPositionalEmbeddingWithPositionIds β 787 K β |
|
β 87 β model.model.decoder.layers β ModuleList β 56.7 M β |
|
β 88 β model.model.decoder.layers.0 β BartDecoderLayer β 9.5 M β |
|
β 89 β model.model.decoder.layers.0.self_attn β BartSdpaAttention β 2.4 M β |
|
β 90 β model.model.decoder.layers.0.self_attn.k_proj β Linear β 590 K β |
|
β 91 β model.model.decoder.layers.0.self_attn.v_proj β Linear β 590 K β |
|
β 92 β model.model.decoder.layers.0.self_attn.q_proj β Linear β 590 K β |
|
β 93 β model.model.decoder.layers.0.self_attn.out_proj β Linear β 590 K β |
|
β 94 β model.model.decoder.layers.0.activation_fn β GELUActivation β 0 β |
|
β 95 β model.model.decoder.layers.0.self_attn_layer_norm β LayerNorm β 1.5 K β |
|
β 96 β model.model.decoder.layers.0.encoder_attn β BartSdpaAttention β 2.4 M β |
|
β 97 β model.model.decoder.layers.0.encoder_attn.k_proj β Linear β 590 K β |
|
β 98 β model.model.decoder.layers.0.encoder_attn.v_proj β Linear β 590 K β |
|
β 99 β model.model.decoder.layers.0.encoder_attn.q_proj β Linear β 590 K β |
|
β 100 β model.model.decoder.layers.0.encoder_attn.out_proj β Linear β 590 K β |
|
β 101 β model.model.decoder.layers.0.encoder_attn_layer_norm β LayerNorm β 1.5 K β |
|
β 102 β model.model.decoder.layers.0.fc1 β Linear β 2.4 M β |
|
β 103 β model.model.decoder.layers.0.fc2 β Linear β 2.4 M β |
|
β 104 β model.model.decoder.layers.0.final_layer_norm β LayerNorm β 1.5 K β |
|
β 105 β model.model.decoder.layers.1 β BartDecoderLayer β 9.5 M β |
|
β 106 β model.model.decoder.layers.1.self_attn β BartSdpaAttention β 2.4 M β |
|
β 107 β model.model.decoder.layers.1.self_attn.k_proj β Linear β 590 K β |
|
β 108 β model.model.decoder.layers.1.self_attn.v_proj β Linear β 590 K β |
|
β 109 β model.model.decoder.layers.1.self_attn.q_proj β Linear β 590 K β |
|
β 110 β model.model.decoder.layers.1.self_attn.out_proj β Linear β 590 K β |
|
β 111 β model.model.decoder.layers.1.activation_fn β GELUActivation β 0 β |
|
β 112 β model.model.decoder.layers.1.self_attn_layer_norm β LayerNorm β 1.5 K β |
|
β 113 β model.model.decoder.layers.1.encoder_attn β BartSdpaAttention β 2.4 M β |
|
β 114 β model.model.decoder.layers.1.encoder_attn.k_proj β Linear β 590 K β |
|
β 115 β model.model.decoder.layers.1.encoder_attn.v_proj β Linear β 590 K β |
|
β 116 β model.model.decoder.layers.1.encoder_attn.q_proj β Linear β 590 K β |
|
β 117 β model.model.decoder.layers.1.encoder_attn.out_proj β Linear β 590 K β |
|
β 118 β model.model.decoder.layers.1.encoder_attn_layer_norm β LayerNorm β 1.5 K β |
|
β 119 β model.model.decoder.layers.1.fc1 β Linear β 2.4 M β |
|
β 120 β model.model.decoder.layers.1.fc2 β Linear β 2.4 M β |
|
β 121 β model.model.decoder.layers.1.final_layer_norm β LayerNorm β 1.5 K β |
|
β 122 β model.model.decoder.layers.2 β BartDecoderLayer β 9.5 M β |
|
β 123 β model.model.decoder.layers.2.self_attn β BartSdpaAttention β 2.4 M β |
|
β 124 β model.model.decoder.layers.2.self_attn.k_proj β Linear β 590 K β |
|
β 125 β model.model.decoder.layers.2.self_attn.v_proj β Linear β 590 K β |
|
β 126 β model.model.decoder.layers.2.self_attn.q_proj β Linear β 590 K β |
|
β 127 β model.model.decoder.layers.2.self_attn.out_proj β Linear β 590 K β |
|
β 128 β model.model.decoder.layers.2.activation_fn β GELUActivation β 0 β |
|
β 129 β model.model.decoder.layers.2.self_attn_layer_norm β LayerNorm β 1.5 K β |
|
β 130 β model.model.decoder.layers.2.encoder_attn β BartSdpaAttention β 2.4 M β |
|
β 131 β model.model.decoder.layers.2.encoder_attn.k_proj β Linear β 590 K β |
|
β 132 β model.model.decoder.layers.2.encoder_attn.v_proj β Linear β 590 K β |
|
β 133 β model.model.decoder.layers.2.encoder_attn.q_proj β Linear β 590 K β |
|
β 134 β model.model.decoder.layers.2.encoder_attn.out_proj β Linear β 590 K β |
|
β 135 β model.model.decoder.layers.2.encoder_attn_layer_norm β LayerNorm β 1.5 K β |
|
β 136 β model.model.decoder.layers.2.fc1 β Linear β 2.4 M β |
|
β 137 β model.model.decoder.layers.2.fc2 β Linear β 2.4 M β |
|
β 138 β model.model.decoder.layers.2.final_layer_norm β LayerNorm β 1.5 K β |
|
β 139 β model.model.decoder.layers.3 β BartDecoderLayer β 9.5 M β |
|
β 140 β model.model.decoder.layers.3.self_attn β BartSdpaAttention β 2.4 M β |
|
β 141 β model.model.decoder.layers.3.self_attn.k_proj β Linear β 590 K β |
|
β 142 β model.model.decoder.layers.3.self_attn.v_proj β Linear β 590 K β |
|
β 143 β model.model.decoder.layers.3.self_attn.q_proj β Linear β 590 K β |
|
β 144 β model.model.decoder.layers.3.self_attn.out_proj β Linear β 590 K β |
|
β 145 β model.model.decoder.layers.3.activation_fn β GELUActivation β 0 β |
|
β 146 β model.model.decoder.layers.3.self_attn_layer_norm β LayerNorm β 1.5 K β |
|
β 147 β model.model.decoder.layers.3.encoder_attn β BartSdpaAttention β 2.4 M β |
|
β 148 β model.model.decoder.layers.3.encoder_attn.k_proj β Linear β 590 K β |
|
β 149 β model.model.decoder.layers.3.encoder_attn.v_proj β Linear β 590 K β |
|
β 150 β model.model.decoder.layers.3.encoder_attn.q_proj β Linear β 590 K β |
|
β 151 β model.model.decoder.layers.3.encoder_attn.out_proj β Linear β 590 K β |
|
β 152 β model.model.decoder.layers.3.encoder_attn_layer_norm β LayerNorm β 1.5 K β |
|
β 153 β model.model.decoder.layers.3.fc1 β Linear β 2.4 M β |
|
β 154 β model.model.decoder.layers.3.fc2 β Linear β 2.4 M β |
|
β 155 β model.model.decoder.layers.3.final_layer_norm β LayerNorm β 1.5 K β |
|
β 156 β model.model.decoder.layers.4 β BartDecoderLayer β 9.5 M β |
|
β 157 β model.model.decoder.layers.4.self_attn β BartSdpaAttention β 2.4 M β |
|
β 158 β model.model.decoder.layers.4.self_attn.k_proj β Linear β 590 K β |
|
β 159 β model.model.decoder.layers.4.self_attn.v_proj β Linear β 590 K β |
|
β 160 β model.model.decoder.layers.4.self_attn.q_proj β Linear β 590 K β |
|
β 161 β model.model.decoder.layers.4.self_attn.out_proj β Linear β 590 K β |
|
β 162 β model.model.decoder.layers.4.activation_fn β GELUActivation β 0 β |
|
β 163 β model.model.decoder.layers.4.self_attn_layer_norm β LayerNorm β 1.5 K β |
|
β 164 β model.model.decoder.layers.4.encoder_attn β BartSdpaAttention β 2.4 M β |
|
β 165 β model.model.decoder.layers.4.encoder_attn.k_proj β Linear β 590 K β |
|
β 166 β model.model.decoder.layers.4.encoder_attn.v_proj β Linear β 590 K β |
|
β 167 β model.model.decoder.layers.4.encoder_attn.q_proj β Linear β 590 K β |
|
β 168 β model.model.decoder.layers.4.encoder_attn.out_proj β Linear β 590 K β |
|
β 169 β model.model.decoder.layers.4.encoder_attn_layer_norm β LayerNorm β 1.5 K β |
|
β 170 β model.model.decoder.layers.4.fc1 β Linear β 2.4 M β |
|
β 171 β model.model.decoder.layers.4.fc2 β Linear β 2.4 M β |
|
β 172 β model.model.decoder.layers.4.final_layer_norm β LayerNorm β 1.5 K β |
|
β 173 β model.model.decoder.layers.5 β BartDecoderLayer β 9.5 M β |
|
β 174 β model.model.decoder.layers.5.self_attn β BartSdpaAttention β 2.4 M β |
|
β 175 β model.model.decoder.layers.5.self_attn.k_proj β Linear β 590 K β |
|
β 176 β model.model.decoder.layers.5.self_attn.v_proj β Linear β 590 K β |
|
β 177 β model.model.decoder.layers.5.self_attn.q_proj β Linear β 590 K β |
|
β 178 β model.model.decoder.layers.5.self_attn.out_proj β Linear β 590 K β |
|
β 179 β model.model.decoder.layers.5.activation_fn β GELUActivation β 0 β |
|
β 180 β model.model.decoder.layers.5.self_attn_layer_norm β LayerNorm β 1.5 K β |
|
β 181 β model.model.decoder.layers.5.encoder_attn β BartSdpaAttention β 2.4 M β |
|
β 182 β model.model.decoder.layers.5.encoder_attn.k_proj β Linear β 590 K β |
|
β 183 β model.model.decoder.layers.5.encoder_attn.v_proj β Linear β 590 K β |
|
β 184 β model.model.decoder.layers.5.encoder_attn.q_proj β Linear β 590 K β |
|
β 185 β model.model.decoder.layers.5.encoder_attn.out_proj β Linear β 590 K β |
|
β 186 β model.model.decoder.layers.5.encoder_attn_layer_norm β LayerNorm β 1.5 K β |
|
β 187 β model.model.decoder.layers.5.fc1 β Linear β 2.4 M β |
|
β 188 β model.model.decoder.layers.5.fc2 β Linear β 2.4 M β |
|
β 189 β model.model.decoder.layers.5.final_layer_norm β LayerNorm β 1.5 K β |
|
β 190 β model.model.decoder.layernorm_embedding β LayerNorm β 1.5 K β |
|
β 191 β model.pointer_head β PointerHead β 41.0 M β |
|
β 192 β model.pointer_head.encoder_mlp β Sequential β 1.2 M β |
|
β 193 β model.pointer_head.encoder_mlp.0 β Linear β 590 K β |
|
β 194 β model.pointer_head.encoder_mlp.1 β Dropout β 0 β |
|
β 195 β model.pointer_head.encoder_mlp.2 β ReLU β 0 β |
|
β 196 β model.pointer_head.encoder_mlp.3 β Linear β 590 K β |
|
β 197 β model.pointer_head.constraints_encoder_mlp β Sequential β 1.2 M β |
|
β 198 β model.pointer_head.constraints_encoder_mlp.0 β Linear β 590 K β |
|
β 199 β model.pointer_head.constraints_encoder_mlp.1 β Dropout β 0 β |
|
β 200 β model.pointer_head.constraints_encoder_mlp.2 β ReLU β 0 β |
|
β 201 β model.pointer_head.constraints_encoder_mlp.3 β Linear β 590 K β |
|
βββββββ΄βββββββββββββββββββββββββββββββββββββββββββββββββββββββ΄ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ΄βββββββββ |
|
[1mTrainable params[22m: 141 M |
|
[1mNon-trainable params[22m: 0 |
|
[1mTotal params[22m: 141 M |
|
[1mTotal estimated model params size (MB)[22m: 567 |
|
/home/arne/miniconda3/envs/pie-document-level/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers |
|
which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance. |
|
/home/arne/miniconda3/envs/pie-document-level/lib/python3.9/site-packages/transformers/generation/utils.py:1197: UserWarning: You have modified the pretrained model configuration to |
|
control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use and modify the model generation configuration (see |
|
https://huggingface.co/docs/transformers/generation_strategies#default-text-generation-configuration ) |
|
encode targets: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 537/537 [00:02<00:00, 222.45it/s] |
|
encode inputs: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 4/4 [00:00<00:00, 10.53it/s] |
|
encode targets: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 72/72 [00:00<00:00, 231.24it/s] |
|
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] |
|
Loading `train_dataloader` to estimate number of stepping batches. |
|
/home/arne/miniconda3/envs/pie-document-level/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
[37mEpoch 0/149[39m [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.82it/s[39m [37mv_num: 91di loss/train_step: 5.348 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 1/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.89it/s[39m [37mv_num: 91di loss/train_step: 4.651 loss/val: 5.071 loss/train_epoch: 5.349 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 2/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.84it/s[39m [37mv_num: 91di loss/train_step: 5.185 loss/val: 3.677 loss/train_epoch: 4.854 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 3/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.85it/s[39m [37mv_num: 91di loss/train_step: 3.813 loss/val: 3.471 loss/train_epoch: 4.415 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 4/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.72it/s[39m [37mv_num: 91di loss/train_step: 2.717 loss/val: 2.595 loss/train_epoch: 4.252 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 5/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.82it/s[39m [37mv_num: 91di loss/train_step: 3.089 loss/val: 2.219 loss/train_epoch: 3.127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 6/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.72it/s[39m [37mv_num: 91di loss/train_step: 3.199 loss/val: 2.075 loss/train_epoch: 2.683 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 7/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.82it/s[39m [37mv_num: 91di loss/train_step: 2.419 loss/val: 1.917 loss/train_epoch: 2.521 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 8/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.89it/s[39m [37mv_num: 91di loss/train_step: 2.774 loss/val: 1.801 loss/train_epoch: 2.188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 9/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.82it/s[39m [37mv_num: 91di loss/train_step: 0.161 loss/val: 1.802 loss/train_epoch: 2.138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 10/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.95it/s[39m [37mv_num: 91di loss/train_step: 2.037 loss/val: 1.605 loss/train_epoch: 2.026 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 11/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.88it/s[39m [37mv_num: 91di loss/train_step: 2.007 loss/val: 1.410 loss/train_epoch: 1.764 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 12/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.76it/s[39m [37mv_num: 91di loss/train_step: 1.451 loss/val: 1.374 loss/train_epoch: 1.689 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 13/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.92it/s[39m [37mv_num: 91di loss/train_step: 0.793 loss/val: 1.232 loss/train_epoch: 1.381 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 14/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.78it/s[39m [37mv_num: 91di loss/train_step: 1.384 loss/val: 1.248 loss/train_epoch: 1.386 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 15/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.90it/s[39m [37mv_num: 91di loss/train_step: 1.072 loss/val: 1.184 loss/train_epoch: 1.141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 16/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.98it/s[39m [37mv_num: 91di loss/train_step: 0.182 loss/val: 1.105 loss/train_epoch: 1.041 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 17/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.78it/s[39m [37mv_num: 91di loss/train_step: 0.782 loss/val: 1.127 loss/train_epoch: 0.931 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 18/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.84it/s[39m [37mv_num: 91di loss/train_step: 0.855 loss/val: 1.123 loss/train_epoch: 0.865 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 19/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.64it/s[39m [37mv_num: 91di loss/train_step: 0.745 loss/val: 1.117 loss/train_epoch: 0.771 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 20/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m6.08it/s[39m [37mv_num: 91di loss/train_step: 1.241 loss/val: 1.065 loss/train_epoch: 0.750 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 21/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.81it/s[39m [37mv_num: 91di loss/train_step: 0.645 loss/val: 1.074 loss/train_epoch: 0.737 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 22/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.75it/s[39m [37mv_num: 91di loss/train_step: 0.422 loss/val: 1.103 loss/train_epoch: 0.682 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 23/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.78it/s[39m [37mv_num: 91di loss/train_step: 0.125 loss/val: 1.159 loss/train_epoch: 0.613 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 24/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.679 loss/val: 1.104 loss/train_epoch: 0.603 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 25/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.72it/s[39m [37mv_num: 91di loss/train_step: 0.399 loss/val: 1.202 loss/train_epoch: 0.538 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 26/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.85it/s[39m [37mv_num: 91di loss/train_step: 0.559 loss/val: 1.146 loss/train_epoch: 0.538 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 27/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.75it/s[39m [37mv_num: 91di loss/train_step: 0.696 loss/val: 1.154 loss/train_epoch: 0.492 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 28/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.89it/s[39m [37mv_num: 91di loss/train_step: 0.314 loss/val: 1.223 loss/train_epoch: 0.452 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 29/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.84it/s[39m [37mv_num: 91di loss/train_step: 0.831 loss/val: 1.245 loss/train_epoch: 0.417 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 30/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.74it/s[39m [37mv_num: 91di loss/train_step: 0.603 loss/val: 1.228 loss/train_epoch: 0.441 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 31/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.166 loss/val: 1.257 loss/train_epoch: 0.410 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 32/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.77it/s[39m [37mv_num: 91di loss/train_step: 0.080 loss/val: 1.371 loss/train_epoch: 0.369 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 33/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.90it/s[39m [37mv_num: 91di loss/train_step: 0.463 loss/val: 1.380 loss/train_epoch: 0.347 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 34/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.98it/s[39m [37mv_num: 91di loss/train_step: 0.130 loss/val: 1.409 loss/train_epoch: 0.338 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 35/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.98it/s[39m [37mv_num: 91di loss/train_step: 0.221 loss/val: 1.524 loss/train_epoch: 0.316 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 36/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.90it/s[39m [37mv_num: 91di loss/train_step: 0.195 loss/val: 1.582 loss/train_epoch: 0.303 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 37/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.85it/s[39m [37mv_num: 91di loss/train_step: 0.112 loss/val: 1.523 loss/train_epoch: 0.303 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 38/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.86it/s[39m [37mv_num: 91di loss/train_step: 0.649 loss/val: 1.394 loss/train_epoch: 0.303 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 39/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.77it/s[39m [37mv_num: 91di loss/train_step: 0.247 loss/val: 1.421 loss/train_epoch: 0.293 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 40/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.74it/s[39m [37mv_num: 91di loss/train_step: 0.129 loss/val: 1.397 loss/train_epoch: 0.283 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 41/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.89it/s[39m [37mv_num: 91di loss/train_step: 0.269 loss/val: 1.445 loss/train_epoch: 0.247 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 42/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.187 loss/val: 1.516 loss/train_epoch: 0.245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 43/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.94it/s[39m [37mv_num: 91di loss/train_step: 0.422 loss/val: 1.480 loss/train_epoch: 0.238 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 44/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.91it/s[39m [37mv_num: 91di loss/train_step: 0.315 loss/val: 1.524 loss/train_epoch: 0.227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 45/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.94it/s[39m [37mv_num: 91di loss/train_step: 0.203 loss/val: 1.493 loss/train_epoch: 0.223 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 46/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.097 loss/val: 1.513 loss/train_epoch: 0.228 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 47/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.202 loss/val: 1.626 loss/train_epoch: 0.215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 48/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.86it/s[39m [37mv_num: 91di loss/train_step: 0.198 loss/val: 1.830 loss/train_epoch: 0.196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 49/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.72it/s[39m [37mv_num: 91di loss/train_step: 0.238 loss/val: 1.544 loss/train_epoch: 0.193 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 50/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.71it/s[39m [37mv_num: 91di loss/train_step: 0.188 loss/val: 1.625 loss/train_epoch: 0.183 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 51/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.89it/s[39m [37mv_num: 91di loss/train_step: 0.089 loss/val: 1.623 loss/train_epoch: 0.197 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 52/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.82it/s[39m [37mv_num: 91di loss/train_step: 0.090 loss/val: 1.546 loss/train_epoch: 0.212 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 53/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.78it/s[39m [37mv_num: 91di loss/train_step: 0.216 loss/val: 1.505 loss/train_epoch: 0.199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 54/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.85it/s[39m [37mv_num: 91di loss/train_step: 0.201 loss/val: 1.678 loss/train_epoch: 0.162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 55/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.90it/s[39m [37mv_num: 91di loss/train_step: 0.064 loss/val: 1.652 loss/train_epoch: 0.154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 56/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.91it/s[39m [37mv_num: 91di loss/train_step: 0.041 loss/val: 1.553 loss/train_epoch: 0.157 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 57/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.86it/s[39m [37mv_num: 91di loss/train_step: 0.217 loss/val: 1.675 loss/train_epoch: 0.148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 58/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.75it/s[39m [37mv_num: 91di loss/train_step: 0.113 loss/val: 1.723 loss/train_epoch: 0.139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 59/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.77it/s[39m [37mv_num: 91di loss/train_step: 0.104 loss/val: 1.795 loss/train_epoch: 0.136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 60/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.91it/s[39m [37mv_num: 91di loss/train_step: 0.069 loss/val: 1.769 loss/train_epoch: 0.133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 61/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m6.06it/s[39m [37mv_num: 91di loss/train_step: 0.127 loss/val: 1.640 loss/train_epoch: 0.133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 62/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.82it/s[39m [37mv_num: 91di loss/train_step: 0.246 loss/val: 1.746 loss/train_epoch: 0.146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 63/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.629 loss/val: 1.645 loss/train_epoch: 0.148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 64/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.80it/s[39m [37mv_num: 91di loss/train_step: 0.111 loss/val: 1.741 loss/train_epoch: 0.131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 65/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.233 loss/val: 1.862 loss/train_epoch: 0.121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 66/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.96it/s[39m [37mv_num: 91di loss/train_step: 0.073 loss/val: 1.758 loss/train_epoch: 0.145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 67/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.99it/s[39m [37mv_num: 91di loss/train_step: 0.028 loss/val: 1.794 loss/train_epoch: 0.137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 68/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.83it/s[39m [37mv_num: 91di loss/train_step: 0.026 loss/val: 1.717 loss/train_epoch: 0.132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 69/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.86it/s[39m [37mv_num: 91di loss/train_step: 0.123 loss/val: 1.874 loss/train_epoch: 0.113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 70/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.80it/s[39m [37mv_num: 91di loss/train_step: 0.045 loss/val: 1.794 loss/train_epoch: 0.124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 71/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.100 loss/val: 1.785 loss/train_epoch: 0.119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 72/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.91it/s[39m [37mv_num: 91di loss/train_step: 0.034 loss/val: 1.798 loss/train_epoch: 0.107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 73/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.105 loss/val: 1.776 loss/train_epoch: 0.105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 74/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.93it/s[39m [37mv_num: 91di loss/train_step: 0.168 loss/val: 1.799 loss/train_epoch: 0.129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 75/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.065 loss/val: 1.889 loss/train_epoch: 0.105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 76/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.035 loss/val: 1.877 loss/train_epoch: 0.109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 77/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.83it/s[39m [37mv_num: 91di loss/train_step: 0.078 loss/val: 1.928 loss/train_epoch: 0.097 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 78/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.88it/s[39m [37mv_num: 91di loss/train_step: 0.131 loss/val: 1.931 loss/train_epoch: 0.109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 79/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.70it/s[39m [37mv_num: 91di loss/train_step: 0.099 loss/val: 1.792 loss/train_epoch: 0.114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 80/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.74it/s[39m [37mv_num: 91di loss/train_step: 0.059 loss/val: 1.861 loss/train_epoch: 0.114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 81/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.85it/s[39m [37mv_num: 91di loss/train_step: 0.023 loss/val: 1.913 loss/train_epoch: 0.104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 82/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.044 loss/val: 1.905 loss/train_epoch: 0.089 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 83/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.74it/s[39m [37mv_num: 91di loss/train_step: 0.072 loss/val: 1.943 loss/train_epoch: 0.091 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 84/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.78it/s[39m [37mv_num: 91di loss/train_step: 0.083 loss/val: 1.943 loss/train_epoch: 0.089 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 85/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.80it/s[39m [37mv_num: 91di loss/train_step: 0.039 loss/val: 1.942 loss/train_epoch: 0.100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 86/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.83it/s[39m [37mv_num: 91di loss/train_step: 0.040 loss/val: 1.911 loss/train_epoch: 0.080 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 87/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.76it/s[39m [37mv_num: 91di loss/train_step: 0.056 loss/val: 1.917 loss/train_epoch: 0.088 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 88/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.85it/s[39m [37mv_num: 91di loss/train_step: 0.113 loss/val: 1.911 loss/train_epoch: 0.084 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 89/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.81it/s[39m [37mv_num: 91di loss/train_step: 0.035 loss/val: 1.832 loss/train_epoch: 0.096 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 90/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.96it/s[39m [37mv_num: 91di loss/train_step: 0.200 loss/val: 1.940 loss/train_epoch: 0.078 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 91/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.84it/s[39m [37mv_num: 91di loss/train_step: 0.037 loss/val: 1.810 loss/train_epoch: 0.152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 92/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.86it/s[39m [37mv_num: 91di loss/train_step: 0.061 loss/val: 1.882 loss/train_epoch: 0.097 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 93/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.86it/s[39m [37mv_num: 91di loss/train_step: 0.032 loss/val: 1.924 loss/train_epoch: 0.086 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 94/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.78it/s[39m [37mv_num: 91di loss/train_step: 0.065 loss/val: 1.964 loss/train_epoch: 0.088 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 95/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.82it/s[39m [37mv_num: 91di loss/train_step: 0.175 loss/val: 2.014 loss/train_epoch: 0.072 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 96/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.098 loss/val: 2.006 loss/train_epoch: 0.082 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 97/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.83it/s[39m [37mv_num: 91di loss/train_step: 0.039 loss/val: 2.039 loss/train_epoch: 0.073 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 98/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.75it/s[39m [37mv_num: 91di loss/train_step: 0.183 loss/val: 2.132 loss/train_epoch: 0.087 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 99/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.86it/s[39m [37mv_num: 91di loss/train_step: 0.369 loss/val: 2.105 loss/train_epoch: 0.076 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 100/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.70it/s[39m [37mv_num: 91di loss/train_step: 0.041 loss/val: 2.049 loss/train_epoch: 0.076 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 101/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.94it/s[39m [37mv_num: 91di loss/train_step: 0.036 loss/val: 2.143 loss/train_epoch: 0.077 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 102/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.77it/s[39m [37mv_num: 91di loss/train_step: 0.081 loss/val: 2.091 loss/train_epoch: 0.075 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 103/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.75it/s[39m [37mv_num: 91di loss/train_step: 0.033 loss/val: 2.147 loss/train_epoch: 0.076 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 104/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.88it/s[39m [37mv_num: 91di loss/train_step: 0.132 loss/val: 2.083 loss/train_epoch: 0.081 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 105/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.73it/s[39m [37mv_num: 91di loss/train_step: 0.021 loss/val: 2.136 loss/train_epoch: 0.079 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 106/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.77it/s[39m [37mv_num: 91di loss/train_step: 0.175 loss/val: 2.173 loss/train_epoch: 0.068 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 107/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.96it/s[39m [37mv_num: 91di loss/train_step: 0.032 loss/val: 2.089 loss/train_epoch: 0.075 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 108/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.84it/s[39m [37mv_num: 91di loss/train_step: 0.038 loss/val: 2.221 loss/train_epoch: 0.071 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 109/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.046 loss/val: 2.048 loss/train_epoch: 0.077 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 110/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.78it/s[39m [37mv_num: 91di loss/train_step: 0.033 loss/val: 2.112 loss/train_epoch: 0.069 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 111/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.84it/s[39m [37mv_num: 91di loss/train_step: 0.043 loss/val: 2.121 loss/train_epoch: 0.068 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 112/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.63it/s[39m [37mv_num: 91di loss/train_step: 0.033 loss/val: 2.148 loss/train_epoch: 0.072 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 113/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.80it/s[39m [37mv_num: 91di loss/train_step: 0.035 loss/val: 2.160 loss/train_epoch: 0.068 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 114/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.88it/s[39m [37mv_num: 91di loss/train_step: 0.047 loss/val: 2.233 loss/train_epoch: 0.075 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 115/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.89it/s[39m [37mv_num: 91di loss/train_step: 0.056 loss/val: 2.143 loss/train_epoch: 0.066 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 116/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.77it/s[39m [37mv_num: 91di loss/train_step: 0.047 loss/val: 2.137 loss/train_epoch: 0.064 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 117/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.065 loss/val: 2.219 loss/train_epoch: 0.061 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 118/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.99it/s[39m [37mv_num: 91di loss/train_step: 0.022 loss/val: 2.201 loss/train_epoch: 0.072 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 119/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.68it/s[39m [37mv_num: 91di loss/train_step: 0.023 loss/val: 2.236 loss/train_epoch: 0.070 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 120/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.71it/s[39m [37mv_num: 91di loss/train_step: 0.030 loss/val: 2.254 loss/train_epoch: 0.066 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 121/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.052 loss/val: 2.163 loss/train_epoch: 0.071 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 122/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.70it/s[39m [37mv_num: 91di loss/train_step: 0.055 loss/val: 2.212 loss/train_epoch: 0.075 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 123/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.82it/s[39m [37mv_num: 91di loss/train_step: 0.026 loss/val: 2.267 loss/train_epoch: 0.067 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 124/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.92it/s[39m [37mv_num: 91di loss/train_step: 0.020 loss/val: 2.247 loss/train_epoch: 0.071 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 125/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.91it/s[39m [37mv_num: 91di loss/train_step: 0.072 loss/val: 2.169 loss/train_epoch: 0.070 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 126/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.83it/s[39m [37mv_num: 91di loss/train_step: 0.685 loss/val: 2.217 loss/train_epoch: 0.063 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 127/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.046 loss/val: 2.225 loss/train_epoch: 0.065 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 128/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.89it/s[39m [37mv_num: 91di loss/train_step: 0.021 loss/val: 2.262 loss/train_epoch: 0.059 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 129/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.87it/s[39m [37mv_num: 91di loss/train_step: 0.030 loss/val: 2.257 loss/train_epoch: 0.060 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 130/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.74it/s[39m [37mv_num: 91di loss/train_step: 0.038 loss/val: 2.274 loss/train_epoch: 0.059 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 131/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.91it/s[39m [37mv_num: 91di loss/train_step: 0.030 loss/val: 2.288 loss/train_epoch: 0.059 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 132/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.413 loss/val: 2.227 loss/train_epoch: 0.064 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 133/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.023 loss/val: 2.291 loss/train_epoch: 0.061 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 134/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.76it/s[39m [37mv_num: 91di loss/train_step: 0.038 loss/val: 2.346 loss/train_epoch: 0.058 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 135/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.98it/s[39m [37mv_num: 91di loss/train_step: 0.040 loss/val: 2.342 loss/train_epoch: 0.056 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 136/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.83it/s[39m [37mv_num: 91di loss/train_step: 0.024 loss/val: 2.382 loss/train_epoch: 0.055 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 137/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.88it/s[39m [37mv_num: 91di loss/train_step: 0.060 loss/val: 2.370 loss/train_epoch: 0.060 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 138/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.80it/s[39m [37mv_num: 91di loss/train_step: 0.029 loss/val: 2.350 loss/train_epoch: 0.060 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 139/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.97it/s[39m [37mv_num: 91di loss/train_step: 0.020 loss/val: 2.357 loss/train_epoch: 0.061 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 140/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.91it/s[39m [37mv_num: 91di loss/train_step: 0.018 loss/val: 2.340 loss/train_epoch: 0.074 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 141/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.82it/s[39m [37mv_num: 91di loss/train_step: 0.038 loss/val: 2.392 loss/train_epoch: 0.056 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 142/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.92it/s[39m [37mv_num: 91di loss/train_step: 0.023 loss/val: 2.362 loss/train_epoch: 0.062 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 143/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.98it/s[39m [37mv_num: 91di loss/train_step: 0.057 loss/val: 2.396 loss/train_epoch: 0.068 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 144/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.72it/s[39m [37mv_num: 91di loss/train_step: 0.053 loss/val: 2.407 loss/train_epoch: 0.064 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 145/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.83it/s[39m [37mv_num: 91di loss/train_step: 0.181 loss/val: 2.387 loss/train_epoch: 0.061 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 146/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.031 loss/val: 2.404 loss/train_epoch: 0.065 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 147/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.79it/s[39m [37mv_num: 91di loss/train_step: 0.028 loss/val: 2.356 loss/train_epoch: 0.052 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 148/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:46 β’ 0:00:00[39m [38m5.85it/s[39m [37mv_num: 91di loss/train_step: 0.030 loss/val: 2.376 loss/train_epoch: 0.058 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Epoch 149/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.72it/s[39m [37mv_num: 91di loss/train_step: 0.028 loss/val: 2.393 loss/train_epoch: 0.051 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
[37mValidation[39m [38mβββββββββββββββββββββββββββββββββββββΈβββ[39m [37m33/36 [39m [38m0:00:59 β’ 0:00:06[39m [38m0.58it/s |
|
|
|
`Trainer.fit` stopped: `max_epochs=150` reached. |
|
/home/arne/miniconda3/envs/pie-document-level/lib/python3.9/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. |
|
Epoch 149/149 [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m268/268[39m [38m0:00:45 β’ 0:00:00[39m [38m5.72it/s[39m [37mv_num: 91di loss/train_step: 0.028 loss/val: 2.378 loss/train_epoch: 0.059 |
|
[?25h[[36m2024-05-29 04:19:31,092[39m][[34m__main__[39m][[32mINFO[39m] - Best ckpt path: /mnt/data/experiments/pie-document-level/logs/training/multiruns/dataset-sciarg/task-ner_re/v0.3/2024-05-28_18-12-11/1/checkpoints/epoch_146.ckpt |
|
Some weights of BartAsPointerNetwork were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['pointer_head.constraints_encoder_mlp.0.bias', 'pointer_head.constraints_encoder_mlp.0.weight', 'pointer_head.constraints_encoder_mlp.3.bias', 'pointer_head.constraints_encoder_mlp.3.weight', 'pointer_head.decoder_position_id_pattern', 'pointer_head.encoder_mlp.0.bias', 'pointer_head.encoder_mlp.0.weight', 'pointer_head.encoder_mlp.3.bias', 'pointer_head.encoder_mlp.3.weight', 'pointer_head.target2token_id'] |
|
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. |
|
encode inputs: 50%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | 2/4 [00:00<00:00, 10.79it/s] |
|
[[36m2024-05-29 04:19:34,073[39m][[34m__main__[39m][[32mINFO[39m] - Save model to /home/arne/projects/pie-document-level/models/dataset-sciarg/task-ner_re/v0.3/2024-05-28_23-33-46 [push_to_hub=False] |
|
[[36m2024-05-29 04:19:34,508[39m][[34m__main__[39m][[32mINFO[39m] - Starting validation! |
|
[[36m2024-05-29 04:19:34,626[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A04 to token based documents, missed annotations (disable this message with verbose=False): |
|
{ |
|
"binary_relations": "{BinaryRelation(head=LabeledSpan(start=3205, end=3270, label='background_claim', score=1.0), tail=LabeledSpan(start=7724, end=7814, label='background_claim', score=1.0), label='semantically_same', score=1.0)}" |
|
encode inputs: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 4/4 [00:00<00:00, 10.55it/s] |
|
encode targets: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 72/72 [00:00<00:00, 238.68it/s] |
|
Restoring states from the checkpoint path at /mnt/data/experiments/pie-document-level/logs/training/multiruns/dataset-sciarg/task-ner_re/v0.3/2024-05-28_18-12-11/1/checkpoints/epoch_146.ckpt |
|
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] |
|
Loaded model weights from the checkpoint at /mnt/data/experiments/pie-document-level/logs/training/multiruns/dataset-sciarg/task-ner_re/v0.3/2024-05-28_18-12-11/1/checkpoints/epoch_146.ckpt |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
[37mValidation[39m [38mβββββββββββββββββββββββββββββββββββββΈβββ[39m [37m33/36[39m [38m0:00:57 β’ 0:00:05[39m [38m0.64it/s |
|
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ³ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
|
β[1m Validate metric [22mβ[1m DataLoader 0 [22mβ |
|
β‘ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ© |
|
β[36m loss/val [39mβ[35m 2.3559770584106445 [39mβ |
|
β[36m metric/binary_relations/contradicts/f1/val [39mβ[35m 0.20481927692890167 [39mβ |
|
β[36m metric/binary_relations/contradicts/precision/val [39mβ[35m 0.2266666740179062 [39mβ |
|
β[36m metric/binary_relations/contradicts/recall/val [39mβ[35m 0.18681319057941437 [39mβ |
|
β[36m metric/binary_relations/macro/f1/val [39mβ[35m 0.16949915885925293 [39mβ |
|
β[36m metric/binary_relations/macro/precision/val [39mβ[35m 0.1830090433359146 [39mβ |
|
β[36m metric/binary_relations/macro/recall/val [39mβ[35m 0.15798917412757874 [39mβ |
|
β[36m metric/binary_relations/micro/f1/val [39mβ[35m 0.23862887918949127 [39mβ |
|
β[36m metric/binary_relations/micro/precision/val [39mβ[35m 0.25931233167648315 [39mβ |
|
β[36m metric/binary_relations/micro/recall/val [39mβ[35m 0.22100122272968292 [39mβ |
|
β[36m metric/binary_relations/parts_of_same/f1/val [39mβ[35m 0.22608695924282074 [39mβ |
|
β[36m metric/binary_relations/parts_of_same/precision/val [39mβ[35m 0.23636363446712494 [39mβ |
|
β[36m metric/binary_relations/parts_of_same/recall/val [39mβ[35m 0.21666666865348816 [39mβ |
|
β[36m metric/binary_relations/semantically_same/f1/val [39mβ[35m 0.0 [39mβ |
|
β[36m metric/binary_relations/semantically_same/precision/val [39mβ[35m 0.0 [39mβ |
|
β[36m metric/binary_relations/semantically_same/recall/val [39mβ[35m 0.0 [39mβ |
|
β[36m metric/binary_relations/supports/f1/val [39mβ[35m 0.2470904141664505 [39mβ |
|
β[36m metric/binary_relations/supports/precision/val [39mβ[35m 0.26900583505630493 [39mβ |
|
β[36m metric/binary_relations/supports/recall/val [39mβ[35m 0.22847682237625122 [39mβ |
|
β[36m metric/decoding_errors/all/val [39mβ[35m 0.0 [39mβ |
|
β[36m metric/decoding_errors/correct/val [39mβ[35m 1.0 [39mβ |
|
β[36m metric/exact_encoding_matches/val [39mβ[35m 0.3611111044883728 [39mβ |
|
β[36m metric/labeled_spans/background_claim/f1/val [39mβ[35m 0.5077399015426636 [39mβ |
|
β[36m metric/labeled_spans/background_claim/precision/val [39mβ[35m 0.4984802305698395 [39mβ |
|
β[36m metric/labeled_spans/background_claim/recall/val [39mβ[35m 0.5173501372337341 [39mβ |
|
β[36m metric/labeled_spans/data/f1/val [39mβ[35m 0.536285400390625 [39mβ |
|
β[36m metric/labeled_spans/data/precision/val [39mβ[35m 0.5797872543334961 [39mβ |
|
β[36m metric/labeled_spans/data/recall/val [39mβ[35m 0.4988558292388916 [39mβ |
|
β[36m metric/labeled_spans/macro/f1/val [39mβ[35m 0.49655672907829285 [39mβ |
|
β[36m metric/labeled_spans/macro/precision/val [39mβ[35m 0.4986630082130432 [39mβ |
|
β[36m metric/labeled_spans/macro/recall/val [39mβ[35m 0.49792489409446716 [39mβ |
|
β[36m metric/labeled_spans/micro/f1/val [39mβ[35m 0.4843537509441376 [39mβ |
|
β[36m metric/labeled_spans/micro/precision/val [39mβ[35m 0.4762541949748993 [39mβ |
|
β[36m metric/labeled_spans/micro/recall/val [39mβ[35m 0.49273356795310974 [39mβ |
|
β[36m metric/labeled_spans/own_claim/f1/val [39mβ[35m 0.4456448256969452 [39mβ |
|
β[36m metric/labeled_spans/own_claim/precision/val [39mβ[35m 0.4177215099334717 [39mβ |
|
β[36m metric/labeled_spans/own_claim/recall/val [39mβ[35m 0.47756874561309814 [39mβ |
|
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ΄ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
|
[37mValidation[39m [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m36/36[39m [38m0:01:00 β’ 0:00:00[39m [38m0.64it/s |
|
encode inputs: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 9/9 [00:01<00:00, 7.49it/s] |
|
encode targets: 100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 165/165 [00:00<00:00, 257.10it/s] |
|
Restoring states from the checkpoint path at /mnt/data/experiments/pie-document-level/logs/training/multiruns/dataset-sciarg/task-ner_re/v0.3/2024-05-28_18-12-11/1/checkpoints/epoch_146.ckpt |
|
[[36m2024-05-29 04:20:37,543[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A35 to token based documents, missed annotations (disable this message with verbose=False): |
|
{ |
|
"binary_relations": "{BinaryRelation(head=LabeledSpan(start=20713, end=20836, label='own_claim', score=1.0), tail=LabeledSpan(start=19655, end=19670, label='own_claim', score=1.0), label='contradicts', score=1.0)}" |
|
} |
|
[[36m2024-05-29 04:20:38,104[39m][[34mpie_modules.document.processing.tokenization[39m][[33mWARNING[39m] - could not convert all annotations from document with id=A40 to token based documents, missed annotations (disable this message with verbose=False): |
|
{ |
|
"binary_relations": "{BinaryRelation(head=LabeledSpan(start=16497, end=16501, label='data', score=1.0), tail=LabeledSpan(start=17415, end=17613, label='background_claim', score=1.0), label='supports', score=1.0)}" |
|
} |
|
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] |
|
Loaded model weights from the checkpoint at /mnt/data/experiments/pie-document-level/logs/training/multiruns/dataset-sciarg/task-ner_re/v0.3/2024-05-28_18-12-11/1/checkpoints/epoch_146.ckpt |
|
/home/arne/miniconda3/envs/pie-document-level/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ³βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
|
β[1m Test metric [22mβ[1m DataLoader 0 [22mβ |
|
β‘ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ© |
|
β[36m loss/test [39mβ[35m 3.026346206665039 [39mβ |
|
β[36m metric/binary_relations/contradicts/f1/test [39mβ[35m 0.22545455396175385 [39mβ |
|
β[36m metric/binary_relations/contradicts/precision/test [39mβ[35m 0.2246376872062683 [39mβ |
|
β[36m metric/binary_relations/contradicts/recall/test [39mβ[35m 0.22627736628055573 [39mβ |
|
β[36m metric/binary_relations/macro/f1/test [39mβ[35m 0.2669767737388611 [39mβ |
|
β[36m metric/binary_relations/macro/precision/test [39mβ[35m 0.24492204189300537 [39mβ |
|
β[36m metric/binary_relations/macro/recall/test [39mβ[35m 0.31745821237564087 [39mβ |
|
β[36m metric/binary_relations/micro/f1/test [39mβ[35m 0.21676044166088104 [39mβ |
|
β[36m metric/binary_relations/micro/precision/test [39mβ[35m 0.22932331264019012 [39mβ |
|
β[36m metric/binary_relations/micro/recall/test [39mβ[35m 0.20550252497196198 [39mβ |
|
β[36m metric/binary_relations/parts_of_same/f1/test [39mβ[35m 0.17359857261180878 [39mβ |
|
β[36m metric/binary_relations/parts_of_same/precision/test [39mβ[35m 0.1818181872367859 [39mβ |
|
β[36m metric/binary_relations/parts_of_same/recall/test [39mβ[35m 0.1660899668931961 [39mβ |
|
β[36m metric/binary_relations/semantically_same/f1/test [39mβ[35m 0.444444477558136 [39mβ |
|
β[36m metric/binary_relations/semantically_same/precision/test [39mβ[35m 0.3333333432674408 [39mβ |
|
β[36m metric/binary_relations/semantically_same/recall/test [39mβ[35m 0.6666666865348816 [39mβ |
|
β[36m metric/binary_relations/supports/f1/test [39mβ[35m 0.22440946102142334 [39mβ |
|
β[36m metric/binary_relations/supports/precision/test [39mβ[35m 0.23989899456501007 [39mβ |
|
β[36m metric/binary_relations/supports/recall/test [39mβ[35m 0.21079881489276886 [39mβ |
|
β[36m metric/decoding_errors/all/test [39mβ[35m 0.20856545865535736 [39mβ |
|
β[36m metric/decoding_errors/correct/test [39mβ[35m 0.7914345264434814 [39mβ |
|
β[36m metric/decoding_errors/index/test [39mβ[35m 0.00034818940912373364 [39mβ |
|
β[36m metric/decoding_errors/len/test [39mβ[35m 0.20821726322174072 [39mβ |
|
β[36m metric/exact_encoding_matches/test [39mβ[35m 0.34545454382896423 [39mβ |
|
β[36m metric/labeled_spans/background_claim/f1/test [39mβ[35m 0.42137405276298523 [39mβ |
|
β[36m metric/labeled_spans/background_claim/precision/test [39mβ[35m 0.4502446949481964 [39mβ |
|
β[36m metric/labeled_spans/background_claim/recall/test [39mβ[35m 0.3959827721118927 [39mβ |
|
β[36m metric/labeled_spans/data/f1/test [39mβ[35m 0.5156335234642029 [39mβ |
|
β[36m metric/labeled_spans/data/precision/test [39mβ[35m 0.5328798294067383 [39mβ |
|
β[36m metric/labeled_spans/data/recall/test [39mβ[35m 0.4994686543941498 [39mβ |
|
β[36m metric/labeled_spans/macro/f1/test [39mβ[35m 0.43463802337646484 [39mβ |
|
β[36m metric/labeled_spans/macro/precision/test [39mβ[35m 0.4366190433502197 [39mβ |
|
β[36m metric/labeled_spans/macro/recall/test [39mβ[35m 0.43793198466300964 [39mβ |
|
β[36m metric/labeled_spans/micro/f1/test [39mβ[35m 0.42222580313682556 [39mβ |
|
β[36m metric/labeled_spans/micro/precision/test [39mβ[35m 0.40691158175468445 [39mβ |
|
β[36m metric/labeled_spans/micro/recall/test [39mβ[35m 0.4387378394603729 [39mβ |
|
β[36m metric/labeled_spans/own_claim/f1/test [39mβ[35m 0.36690646409988403 [39mβ |
|
β[36m metric/labeled_spans/own_claim/precision/test [39mβ[35m 0.32673266530036926 [39mβ |
|
β[36m metric/labeled_spans/own_claim/recall/test [39mβ[35m 0.41834452748298645 [39mβ |
|
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ΄βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
|
[37mTesting[39m [38mββββββββββββββββββββββββββββββββββββββββ[39m [37m83/83[39m [38m0:02:19 β’ 0:00:00[39m [38m0.66it/s |
|
[?25h[[36m2024-05-29 04:22:59,545[39m][[34msrc.utils.logging_utils[39m][[32mINFO[39m] - Closing loggers... |
|
[[36m2024-05-29 04:22:59,545[39m][[34msrc.utils.logging_utils[39m][[32mINFO[39m] - Closing wandb! |