Spaces:
Running
Running
Upload 42 files
Browse files- .gitattributes +3 -0
- .gitignore +19 -0
- CompoundT5/CompoundT5/CompoundT5-config/config.json +30 -0
- CompoundT5/CompoundT5/CompoundT5-config/tokenizer.json +287 -0
- CompoundT5/CompoundT5/new_run_t5_mlm_flax.py +1143 -0
- CompoundT5/CompoundT5/run.sh +20 -0
- CompoundT5/README.md +35 -0
- CompoundT5/prepare_model.py +208 -0
- CompoundT5/preprocess_data.py +168 -0
- LICENSE.txt +21 -0
- data/additional_tokens.txt +46 -0
- data/create_fig.ipynb +0 -0
- data/data_analysis.ipynb +3 -0
- data/demo_reaction_data.csv +113 -0
- generation_utils.py +54 -0
- model-image.png +3 -0
- models.py +176 -0
- task_forward/accuracy-and-invalidity-check.ipynb +217 -0
- task_forward/calculate_accuracy.py +135 -0
- task_forward/finetune.py +251 -0
- task_forward/generate_embedding.py +129 -0
- task_forward/get_distance.py +74 -0
- task_forward/prediction.py +143 -0
- task_forward/train.py +312 -0
- task_forward/visualize_embedding.ipynb +0 -0
- task_retrosynthesis/accuracy-and-invalidity-check.ipynb +207 -0
- task_retrosynthesis/calculate_accuracy.py +134 -0
- task_retrosynthesis/finetune.py +278 -0
- task_retrosynthesis/generate_embedding.py +131 -0
- task_retrosynthesis/get_distance.py +74 -0
- task_retrosynthesis/prediction.py +143 -0
- task_retrosynthesis/train.py +305 -0
- task_retrosynthesis/visualize_embedding.ipynb +0 -0
- task_yield/calculate_score.ipynb +0 -0
- task_yield/convert_to_PreTrainedModel.py +77 -0
- task_yield/finetune.py +219 -0
- task_yield/generate_embedding.py +138 -0
- task_yield/get_distance.py +80 -0
- task_yield/prediction.py +173 -0
- task_yield/prediction_with_PreTrainedModel.py +119 -0
- task_yield/train.py +570 -0
- task_yield/visualize_embedding.ipynb +3 -0
- utils.py +277 -0
.gitattributes
CHANGED
@@ -32,3 +32,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
data/data_analysis.ipynb filter=lfs diff=lfs merge=lfs -text
|
36 |
+
model-image.png filter=lfs diff=lfs merge=lfs -text
|
37 |
+
task_yield/visualize_embedding.ipynb filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.ipynb_checkpoints
|
2 |
+
__pycache__
|
3 |
+
*.csv
|
4 |
+
*.tsv
|
5 |
+
*.smi
|
6 |
+
*.bin
|
7 |
+
*.pth
|
8 |
+
*.pt
|
9 |
+
*.tar
|
10 |
+
*.tar.gz
|
11 |
+
*.zip
|
12 |
+
*.gz
|
13 |
+
*.tgz
|
14 |
+
*.rar
|
15 |
+
*.safetensors
|
16 |
+
*.npy
|
17 |
+
*.pkl
|
18 |
+
|
19 |
+
!data/demo_reaction_data.csv
|
CompoundT5/CompoundT5/CompoundT5-config/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/home/patrick/hugging_face/t5/t5-v1_1-base",
|
3 |
+
"architectures": [
|
4 |
+
"T5ForConditionalGeneration"
|
5 |
+
],
|
6 |
+
"d_ff": 2048,
|
7 |
+
"d_kv": 64,
|
8 |
+
"d_model": 768,
|
9 |
+
"decoder_start_token_id": 0,
|
10 |
+
"dense_act_fn": "gelu_new",
|
11 |
+
"dropout_rate": 0.1,
|
12 |
+
"eos_token_id": 1,
|
13 |
+
"feed_forward_proj": "gated-gelu",
|
14 |
+
"initializer_factor": 1.0,
|
15 |
+
"is_encoder_decoder": true,
|
16 |
+
"is_gated_act": true,
|
17 |
+
"layer_norm_epsilon": 1e-06,
|
18 |
+
"model_type": "t5",
|
19 |
+
"num_decoder_layers": 12,
|
20 |
+
"num_heads": 12,
|
21 |
+
"num_layers": 12,
|
22 |
+
"output_past": true,
|
23 |
+
"pad_token_id": 0,
|
24 |
+
"relative_attention_max_distance": 128,
|
25 |
+
"relative_attention_num_buckets": 32,
|
26 |
+
"tie_word_embeddings": false,
|
27 |
+
"transformers_version": "4.21.0.dev0",
|
28 |
+
"use_cache": true,
|
29 |
+
"vocab_size": 41
|
30 |
+
}
|
CompoundT5/CompoundT5/CompoundT5-config/tokenizer.json
ADDED
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"version": "1.0",
|
3 |
+
"truncation": null,
|
4 |
+
"padding": null,
|
5 |
+
"added_tokens": [
|
6 |
+
{
|
7 |
+
"id": 0,
|
8 |
+
"content": "<pad>",
|
9 |
+
"single_word": false,
|
10 |
+
"lstrip": false,
|
11 |
+
"rstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"special": true
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"id": 1,
|
17 |
+
"content": "</s>",
|
18 |
+
"single_word": false,
|
19 |
+
"lstrip": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"normalized": false,
|
22 |
+
"special": true
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"id": 2,
|
26 |
+
"content": "<unk>",
|
27 |
+
"single_word": false,
|
28 |
+
"lstrip": false,
|
29 |
+
"rstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"special": true
|
32 |
+
}
|
33 |
+
],
|
34 |
+
"normalizer": {
|
35 |
+
"type": "Sequence",
|
36 |
+
"normalizers": [
|
37 |
+
{
|
38 |
+
"type": "Nmt"
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"type": "NFKC"
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"type": "Replace",
|
45 |
+
"pattern": {
|
46 |
+
"Regex": " {2,}"
|
47 |
+
},
|
48 |
+
"content": " "
|
49 |
+
}
|
50 |
+
]
|
51 |
+
},
|
52 |
+
"pre_tokenizer": {
|
53 |
+
"type": "Sequence",
|
54 |
+
"pretokenizers": [
|
55 |
+
{
|
56 |
+
"type": "Metaspace",
|
57 |
+
"replacement": "▁",
|
58 |
+
"add_prefix_space": true
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"type": "Digits",
|
62 |
+
"individual_digits": true
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"type": "Punctuation",
|
66 |
+
"behavior": "Isolated"
|
67 |
+
}
|
68 |
+
]
|
69 |
+
},
|
70 |
+
"post_processor": {
|
71 |
+
"type": "TemplateProcessing",
|
72 |
+
"single": [
|
73 |
+
{
|
74 |
+
"Sequence": {
|
75 |
+
"id": "A",
|
76 |
+
"type_id": 0
|
77 |
+
}
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"SpecialToken": {
|
81 |
+
"id": "</s>",
|
82 |
+
"type_id": 0
|
83 |
+
}
|
84 |
+
}
|
85 |
+
],
|
86 |
+
"pair": [
|
87 |
+
{
|
88 |
+
"Sequence": {
|
89 |
+
"id": "A",
|
90 |
+
"type_id": 0
|
91 |
+
}
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"Sequence": {
|
95 |
+
"id": "B",
|
96 |
+
"type_id": 1
|
97 |
+
}
|
98 |
+
}
|
99 |
+
],
|
100 |
+
"special_tokens": {
|
101 |
+
"</s>": {
|
102 |
+
"id": "</s>",
|
103 |
+
"ids": [
|
104 |
+
1
|
105 |
+
],
|
106 |
+
"tokens": [
|
107 |
+
"</s>"
|
108 |
+
]
|
109 |
+
}
|
110 |
+
}
|
111 |
+
},
|
112 |
+
"decoder": {
|
113 |
+
"type": "Metaspace",
|
114 |
+
"replacement": "▁",
|
115 |
+
"add_prefix_space": true
|
116 |
+
},
|
117 |
+
"model": {
|
118 |
+
"type": "Unigram",
|
119 |
+
"unk_id": 2,
|
120 |
+
"vocab": [
|
121 |
+
[
|
122 |
+
"<pad>",
|
123 |
+
0.0
|
124 |
+
],
|
125 |
+
[
|
126 |
+
"</s>",
|
127 |
+
0.0
|
128 |
+
],
|
129 |
+
[
|
130 |
+
"<unk>",
|
131 |
+
0.0
|
132 |
+
],
|
133 |
+
[
|
134 |
+
"▁",
|
135 |
+
-0.6931471808026011
|
136 |
+
],
|
137 |
+
[
|
138 |
+
"c",
|
139 |
+
-2.289498028516334
|
140 |
+
],
|
141 |
+
[
|
142 |
+
"C",
|
143 |
+
-2.3191188737900035
|
144 |
+
],
|
145 |
+
[
|
146 |
+
"(",
|
147 |
+
-3.157145613029357
|
148 |
+
],
|
149 |
+
[
|
150 |
+
")",
|
151 |
+
-3.157145613029357
|
152 |
+
],
|
153 |
+
[
|
154 |
+
"1",
|
155 |
+
-3.4337494413900735
|
156 |
+
],
|
157 |
+
[
|
158 |
+
"O",
|
159 |
+
-3.8003416456793744
|
160 |
+
],
|
161 |
+
[
|
162 |
+
"2",
|
163 |
+
-3.8354203318153104
|
164 |
+
],
|
165 |
+
[
|
166 |
+
"N",
|
167 |
+
-3.9489619191823486
|
168 |
+
],
|
169 |
+
[
|
170 |
+
"]",
|
171 |
+
-4.114143160310146
|
172 |
+
],
|
173 |
+
[
|
174 |
+
"[",
|
175 |
+
-4.114143160310146
|
176 |
+
],
|
177 |
+
[
|
178 |
+
"@",
|
179 |
+
-4.185726512332149
|
180 |
+
],
|
181 |
+
[
|
182 |
+
"H",
|
183 |
+
-4.201161413116868
|
184 |
+
],
|
185 |
+
[
|
186 |
+
"=",
|
187 |
+
-4.26644820084319
|
188 |
+
],
|
189 |
+
[
|
190 |
+
"n",
|
191 |
+
-4.300186073016661
|
192 |
+
],
|
193 |
+
[
|
194 |
+
"3",
|
195 |
+
-4.824395958274135
|
196 |
+
],
|
197 |
+
[
|
198 |
+
"+",
|
199 |
+
-5.412930408280779
|
200 |
+
],
|
201 |
+
[
|
202 |
+
"F",
|
203 |
+
-5.636658395691338
|
204 |
+
],
|
205 |
+
[
|
206 |
+
"-",
|
207 |
+
-5.944123069167032
|
208 |
+
],
|
209 |
+
[
|
210 |
+
"S",
|
211 |
+
-6.23059354933377
|
212 |
+
],
|
213 |
+
[
|
214 |
+
"s",
|
215 |
+
-6.3086720535935505
|
216 |
+
],
|
217 |
+
[
|
218 |
+
"l",
|
219 |
+
-6.356164827135707
|
220 |
+
],
|
221 |
+
[
|
222 |
+
"4",
|
223 |
+
-6.474778787500576
|
224 |
+
],
|
225 |
+
[
|
226 |
+
"o",
|
227 |
+
-6.5919851676767856
|
228 |
+
],
|
229 |
+
[
|
230 |
+
"#",
|
231 |
+
-7.471440033681638
|
232 |
+
],
|
233 |
+
[
|
234 |
+
"r",
|
235 |
+
-7.600338586268233
|
236 |
+
],
|
237 |
+
[
|
238 |
+
"B",
|
239 |
+
-7.600338586268233
|
240 |
+
],
|
241 |
+
[
|
242 |
+
"/",
|
243 |
+
-8.02057032804323
|
244 |
+
],
|
245 |
+
[
|
246 |
+
"5",
|
247 |
+
-8.905241806184042
|
248 |
+
],
|
249 |
+
[
|
250 |
+
"\\",
|
251 |
+
-9.431656471484382
|
252 |
+
],
|
253 |
+
[
|
254 |
+
"I",
|
255 |
+
-10.348187932078408
|
256 |
+
],
|
257 |
+
[
|
258 |
+
"6",
|
259 |
+
-12.084066778027127
|
260 |
+
],
|
261 |
+
[
|
262 |
+
"7",
|
263 |
+
-15.584016494881563
|
264 |
+
],
|
265 |
+
[
|
266 |
+
"p",
|
267 |
+
-17.628494092721255
|
268 |
+
],
|
269 |
+
[
|
270 |
+
"8",
|
271 |
+
-18.37808350350985
|
272 |
+
],
|
273 |
+
[
|
274 |
+
"P",
|
275 |
+
-19.003564863395415
|
276 |
+
],
|
277 |
+
[
|
278 |
+
".",
|
279 |
+
-20.190108874992006
|
280 |
+
],
|
281 |
+
[
|
282 |
+
"9",
|
283 |
+
-21.023442208325346
|
284 |
+
]
|
285 |
+
]
|
286 |
+
}
|
287 |
+
}
|
CompoundT5/CompoundT5/new_run_t5_mlm_flax.py
ADDED
@@ -0,0 +1,1143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# coding=utf-8
|
3 |
+
# Copyright 2021 The HuggingFace Team All rights reserved.
|
4 |
+
#
|
5 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
6 |
+
# you may not use this file except in compliance with the License.
|
7 |
+
# You may obtain a copy of the License at
|
8 |
+
#
|
9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10 |
+
#
|
11 |
+
# Unless required by applicable law or agreed to in writing, software
|
12 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14 |
+
# See the License for the specific language governing permissions and
|
15 |
+
# limitations under the License.
|
16 |
+
"""
|
17 |
+
Pretraining the library models for T5-like span-masked language modeling on a text file or a dataset.
|
18 |
+
|
19 |
+
Here is the full list of checkpoints on the hub that can be pretrained by this script:
|
20 |
+
https://huggingface.co/models?filter=t5
|
21 |
+
"""
|
22 |
+
|
23 |
+
import json
|
24 |
+
import logging
|
25 |
+
import os
|
26 |
+
import sys
|
27 |
+
import time
|
28 |
+
from dataclasses import asdict, dataclass, field
|
29 |
+
|
30 |
+
# You can also adapt this script on your own masked language modeling task. Pointers for this are left as comments.
|
31 |
+
from enum import Enum
|
32 |
+
|
33 |
+
# from transformers.utils import get_full_repo_name, send_example_telemetry
|
34 |
+
from functools import partialmethod
|
35 |
+
from itertools import chain
|
36 |
+
from pathlib import Path
|
37 |
+
from typing import Dict, List, Optional
|
38 |
+
|
39 |
+
import flax
|
40 |
+
import jax
|
41 |
+
import jax.numpy as jnp
|
42 |
+
import numpy as np
|
43 |
+
import optax
|
44 |
+
from datasets import load_dataset
|
45 |
+
from flax import jax_utils, traverse_util
|
46 |
+
from flax.training import train_state
|
47 |
+
from flax.training.common_utils import get_metrics, onehot, shard
|
48 |
+
from tqdm import tqdm
|
49 |
+
from transformers import (
|
50 |
+
CONFIG_MAPPING,
|
51 |
+
FLAX_MODEL_FOR_MASKED_LM_MAPPING,
|
52 |
+
AutoTokenizer,
|
53 |
+
BatchEncoding,
|
54 |
+
FlaxT5ForConditionalGeneration,
|
55 |
+
HfArgumentParser,
|
56 |
+
PreTrainedTokenizerBase,
|
57 |
+
T5Config,
|
58 |
+
is_tensorboard_available,
|
59 |
+
set_seed,
|
60 |
+
)
|
61 |
+
from transformers.models.t5.modeling_flax_t5 import shift_tokens_right
|
62 |
+
|
63 |
+
tqdm.__init__ = partialmethod(tqdm.__init__, disable=True)
|
64 |
+
|
65 |
+
|
66 |
+
MODEL_CONFIG_CLASSES = list(FLAX_MODEL_FOR_MASKED_LM_MAPPING.keys())
|
67 |
+
MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
|
68 |
+
|
69 |
+
|
70 |
+
@dataclass
|
71 |
+
class TrainingArguments:
|
72 |
+
output_dir: str = field(
|
73 |
+
metadata={
|
74 |
+
"help": "The output directory where the model predictions and checkpoints will be written."
|
75 |
+
},
|
76 |
+
)
|
77 |
+
overwrite_output_dir: bool = field(
|
78 |
+
default=False,
|
79 |
+
metadata={
|
80 |
+
"help": (
|
81 |
+
"Overwrite the content of the output directory. "
|
82 |
+
"Use this to continue training if output_dir points to a checkpoint directory."
|
83 |
+
)
|
84 |
+
},
|
85 |
+
)
|
86 |
+
do_train: bool = field(default=False, metadata={"help": "Whether to run training."})
|
87 |
+
do_eval: bool = field(
|
88 |
+
default=False, metadata={"help": "Whether to run eval on the dev set."}
|
89 |
+
)
|
90 |
+
per_device_train_batch_size: int = field(
|
91 |
+
default=8, metadata={"help": "Batch size per GPU/TPU core/CPU for training."}
|
92 |
+
)
|
93 |
+
per_device_eval_batch_size: int = field(
|
94 |
+
default=8, metadata={"help": "Batch size per GPU/TPU core/CPU for evaluation."}
|
95 |
+
)
|
96 |
+
learning_rate: float = field(
|
97 |
+
default=5e-5, metadata={"help": "The initial learning rate for AdamW."}
|
98 |
+
)
|
99 |
+
weight_decay: float = field(
|
100 |
+
default=0.0, metadata={"help": "Weight decay for AdamW if we apply some."}
|
101 |
+
)
|
102 |
+
adam_beta1: float = field(
|
103 |
+
default=0.9, metadata={"help": "Beta1 for AdamW optimizer"}
|
104 |
+
)
|
105 |
+
adam_beta2: float = field(
|
106 |
+
default=0.999, metadata={"help": "Beta2 for AdamW optimizer"}
|
107 |
+
)
|
108 |
+
adam_epsilon: float = field(
|
109 |
+
default=1e-8, metadata={"help": "Epsilon for AdamW optimizer."}
|
110 |
+
)
|
111 |
+
adafactor: bool = field(
|
112 |
+
default=False,
|
113 |
+
metadata={"help": "Whether or not to replace AdamW by Adafactor."},
|
114 |
+
)
|
115 |
+
num_train_epochs: float = field(
|
116 |
+
default=3.0, metadata={"help": "Total number of training epochs to perform."}
|
117 |
+
)
|
118 |
+
warmup_steps: int = field(
|
119 |
+
default=0, metadata={"help": "Linear warmup over warmup_steps."}
|
120 |
+
)
|
121 |
+
logging_steps: int = field(
|
122 |
+
default=500, metadata={"help": "Log every X updates steps."}
|
123 |
+
)
|
124 |
+
save_steps: int = field(
|
125 |
+
default=500, metadata={"help": "Save checkpoint every X updates steps."}
|
126 |
+
)
|
127 |
+
eval_steps: int = field(
|
128 |
+
default=None, metadata={"help": "Run an evaluation every X steps."}
|
129 |
+
)
|
130 |
+
seed: int = field(
|
131 |
+
default=42,
|
132 |
+
metadata={"help": "Random seed that will be set at the beginning of training."},
|
133 |
+
)
|
134 |
+
push_to_hub: bool = field(
|
135 |
+
default=False,
|
136 |
+
metadata={
|
137 |
+
"help": "Whether or not to upload the trained model to the model hub after training."
|
138 |
+
},
|
139 |
+
)
|
140 |
+
hub_model_id: str = field(
|
141 |
+
default=None,
|
142 |
+
metadata={
|
143 |
+
"help": "The name of the repository to keep in sync with the local `output_dir`."
|
144 |
+
},
|
145 |
+
)
|
146 |
+
hub_token: str = field(
|
147 |
+
default=None, metadata={"help": "The token to use to push to the Model Hub."}
|
148 |
+
)
|
149 |
+
|
150 |
+
def __post_init__(self):
|
151 |
+
if self.output_dir is not None:
|
152 |
+
self.output_dir = os.path.expanduser(self.output_dir)
|
153 |
+
|
154 |
+
def to_dict(self):
|
155 |
+
"""
|
156 |
+
Serializes this instance while replace `Enum` by their values (for JSON serialization support). It obfuscates
|
157 |
+
the token values by removing their value.
|
158 |
+
"""
|
159 |
+
d = asdict(self)
|
160 |
+
for k, v in d.items():
|
161 |
+
if isinstance(v, Enum):
|
162 |
+
d[k] = v.value
|
163 |
+
if isinstance(v, list) and len(v) > 0 and isinstance(v[0], Enum):
|
164 |
+
d[k] = [x.value for x in v]
|
165 |
+
if k.endswith("_token"):
|
166 |
+
d[k] = f"<{k.upper()}>"
|
167 |
+
return d
|
168 |
+
|
169 |
+
|
170 |
+
@dataclass
|
171 |
+
class ModelArguments:
|
172 |
+
"""
|
173 |
+
Arguments pertaining to which model/config/tokenizer we are going to fine-tune, or train from scratch.
|
174 |
+
"""
|
175 |
+
|
176 |
+
model_name_or_path: Optional[str] = field(
|
177 |
+
default=None,
|
178 |
+
metadata={
|
179 |
+
"help": (
|
180 |
+
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
|
181 |
+
)
|
182 |
+
},
|
183 |
+
)
|
184 |
+
model_type: Optional[str] = field(
|
185 |
+
default=None,
|
186 |
+
metadata={
|
187 |
+
"help": "If training from scratch, pass a model type from the list: "
|
188 |
+
+ ", ".join(MODEL_TYPES)
|
189 |
+
},
|
190 |
+
)
|
191 |
+
config_name: Optional[str] = field(
|
192 |
+
default=None,
|
193 |
+
metadata={
|
194 |
+
"help": "Pretrained config name or path if not the same as model_name"
|
195 |
+
},
|
196 |
+
)
|
197 |
+
tokenizer_name: Optional[str] = field(
|
198 |
+
default=None,
|
199 |
+
metadata={
|
200 |
+
"help": "Pretrained tokenizer name or path if not the same as model_name"
|
201 |
+
},
|
202 |
+
)
|
203 |
+
cache_dir: Optional[str] = field(
|
204 |
+
default=None,
|
205 |
+
metadata={
|
206 |
+
"help": "Where do you want to store the pretrained models downloaded from s3"
|
207 |
+
},
|
208 |
+
)
|
209 |
+
use_fast_tokenizer: bool = field(
|
210 |
+
default=True,
|
211 |
+
metadata={
|
212 |
+
"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."
|
213 |
+
},
|
214 |
+
)
|
215 |
+
dtype: Optional[str] = field(
|
216 |
+
default="float32",
|
217 |
+
metadata={
|
218 |
+
"help": (
|
219 |
+
"Floating-point format in which the model weights should be initialized and trained. Choose one of"
|
220 |
+
" `[float32, float16, bfloat16]`."
|
221 |
+
)
|
222 |
+
},
|
223 |
+
)
|
224 |
+
|
225 |
+
|
226 |
+
# use_auth_token: bool = field(
|
227 |
+
# default=False,
|
228 |
+
# metadata={
|
229 |
+
# "help": (
|
230 |
+
# "Will use the token generated when running `transformers-cli login` (necessary to use this script "
|
231 |
+
# "with private models)."
|
232 |
+
# )
|
233 |
+
# },
|
234 |
+
# )
|
235 |
+
|
236 |
+
|
237 |
+
@dataclass
|
238 |
+
class DataTrainingArguments:
|
239 |
+
"""
|
240 |
+
Arguments pertaining to what data we are going to input our model for training and eval.
|
241 |
+
"""
|
242 |
+
|
243 |
+
dataset_name: Optional[str] = field(
|
244 |
+
default=None,
|
245 |
+
metadata={"help": "The name of the dataset to use (via the datasets library)."},
|
246 |
+
)
|
247 |
+
dataset_config_name: Optional[str] = field(
|
248 |
+
default=None,
|
249 |
+
metadata={
|
250 |
+
"help": "The configuration name of the dataset to use (via the datasets library)."
|
251 |
+
},
|
252 |
+
)
|
253 |
+
train_file: Optional[str] = field(
|
254 |
+
default=None, metadata={"help": "The input training data file (a text file)."}
|
255 |
+
)
|
256 |
+
validation_file: Optional[str] = field(
|
257 |
+
default=None,
|
258 |
+
metadata={
|
259 |
+
"help": "An optional input evaluation data file to evaluate the perplexity on (a text file)."
|
260 |
+
},
|
261 |
+
)
|
262 |
+
train_ref_file: Optional[str] = field(
|
263 |
+
default=None,
|
264 |
+
metadata={
|
265 |
+
"help": "An optional input train ref data file for whole word masking in Chinese."
|
266 |
+
},
|
267 |
+
)
|
268 |
+
validation_ref_file: Optional[str] = field(
|
269 |
+
default=None,
|
270 |
+
metadata={
|
271 |
+
"help": "An optional input validation ref data file for whole word masking in Chinese."
|
272 |
+
},
|
273 |
+
)
|
274 |
+
overwrite_cache: bool = field(
|
275 |
+
default=False,
|
276 |
+
metadata={"help": "Overwrite the cached training and evaluation sets"},
|
277 |
+
)
|
278 |
+
validation_split_percentage: Optional[int] = field(
|
279 |
+
default=5,
|
280 |
+
metadata={
|
281 |
+
"help": "The percentage of the train set used as validation set in case there's no validation split"
|
282 |
+
},
|
283 |
+
)
|
284 |
+
max_seq_length: Optional[int] = field(
|
285 |
+
default=None,
|
286 |
+
metadata={
|
287 |
+
"help": (
|
288 |
+
"The maximum total input sequence length after tokenization and masking. Sequences longer than this"
|
289 |
+
" will be truncated. Default to the max input length of the model."
|
290 |
+
)
|
291 |
+
},
|
292 |
+
)
|
293 |
+
preprocessing_num_workers: Optional[int] = field(
|
294 |
+
default=None,
|
295 |
+
metadata={"help": "The number of processes to use for the preprocessing."},
|
296 |
+
)
|
297 |
+
mlm_probability: float = field(
|
298 |
+
default=0.15,
|
299 |
+
metadata={
|
300 |
+
"help": "Ratio of tokens to mask for span masked language modeling loss"
|
301 |
+
},
|
302 |
+
)
|
303 |
+
mean_noise_span_length: float = field(
|
304 |
+
default=3.0,
|
305 |
+
metadata={"help": "Mean span length of masked tokens"},
|
306 |
+
)
|
307 |
+
|
308 |
+
def __post_init__(self):
|
309 |
+
if (
|
310 |
+
self.dataset_name is None
|
311 |
+
and self.train_file is None
|
312 |
+
and self.validation_file is None
|
313 |
+
):
|
314 |
+
raise ValueError(
|
315 |
+
"Need either a dataset name or a training/validation file."
|
316 |
+
)
|
317 |
+
else:
|
318 |
+
if self.train_file is not None:
|
319 |
+
extension = self.train_file.split(".")[-1]
|
320 |
+
assert extension in ["csv", "json", "txt"], (
|
321 |
+
"`train_file` should be a csv, a json or a txt file."
|
322 |
+
)
|
323 |
+
if self.validation_file is not None:
|
324 |
+
extension = self.validation_file.split(".")[-1]
|
325 |
+
assert extension in ["csv", "json", "txt"], (
|
326 |
+
"`validation_file` should be a csv, a json or a txt file."
|
327 |
+
)
|
328 |
+
|
329 |
+
|
330 |
+
def compute_input_and_target_lengths(
|
331 |
+
inputs_length, noise_density, mean_noise_span_length
|
332 |
+
):
|
333 |
+
"""This function is copy of `random_spans_helper <https://github.com/google-research/text-to-text-transfer-transformer/blob/84f8bcc14b5f2c03de51bd3587609ba8f6bbd1cd/t5/data/preprocessors.py#L2466>`__ .
|
334 |
+
|
335 |
+
Training parameters to avoid padding with random_spans_noise_mask.
|
336 |
+
When training a model with random_spans_noise_mask, we would like to set the other
|
337 |
+
training hyperparmeters in a way that avoids padding.
|
338 |
+
This function helps us compute these hyperparameters.
|
339 |
+
We assume that each noise span in the input is replaced by extra_tokens_per_span_inputs sentinel tokens,
|
340 |
+
and each non-noise span in the targets is replaced by extra_tokens_per_span_targets sentinel tokens.
|
341 |
+
This function tells us the required number of tokens in the raw example (for split_tokens())
|
342 |
+
as well as the length of the encoded targets. Note that this function assumes
|
343 |
+
the inputs and targets will have EOS appended and includes that in the reported length.
|
344 |
+
|
345 |
+
Args:
|
346 |
+
inputs_length: an integer - desired length of the tokenized inputs sequence
|
347 |
+
noise_density: a float
|
348 |
+
mean_noise_span_length: a float
|
349 |
+
Returns:
|
350 |
+
tokens_length: length of original text in tokens
|
351 |
+
targets_length: an integer - length in tokens of encoded targets sequence
|
352 |
+
"""
|
353 |
+
|
354 |
+
def _tokens_length_to_inputs_length_targets_length(tokens_length):
|
355 |
+
num_noise_tokens = int(round(tokens_length * noise_density))
|
356 |
+
num_nonnoise_tokens = tokens_length - num_noise_tokens
|
357 |
+
num_noise_spans = int(round(num_noise_tokens / mean_noise_span_length))
|
358 |
+
# inputs contain all nonnoise tokens, sentinels for all noise spans
|
359 |
+
# and one EOS token.
|
360 |
+
_input_length = num_nonnoise_tokens + num_noise_spans + 1
|
361 |
+
_output_length = num_noise_tokens + num_noise_spans + 1
|
362 |
+
return _input_length, _output_length
|
363 |
+
|
364 |
+
tokens_length = inputs_length
|
365 |
+
|
366 |
+
while (
|
367 |
+
_tokens_length_to_inputs_length_targets_length(tokens_length + 1)[0]
|
368 |
+
<= inputs_length
|
369 |
+
):
|
370 |
+
tokens_length += 1
|
371 |
+
|
372 |
+
inputs_length, targets_length = _tokens_length_to_inputs_length_targets_length(
|
373 |
+
tokens_length
|
374 |
+
)
|
375 |
+
|
376 |
+
# minor hack to get the targets length to be equal to inputs length
|
377 |
+
# which is more likely to have been set to a nice round number.
|
378 |
+
if noise_density == 0.5 and targets_length > inputs_length:
|
379 |
+
tokens_length -= 1
|
380 |
+
targets_length -= 1
|
381 |
+
return tokens_length, targets_length
|
382 |
+
|
383 |
+
|
384 |
+
@flax.struct.dataclass
|
385 |
+
class FlaxDataCollatorForT5MLM:
|
386 |
+
"""
|
387 |
+
Data collator used for T5 span-masked language modeling.
|
388 |
+
It is made sure that after masking the inputs are of length `data_args.max_seq_length` and targets are also of fixed length.
|
389 |
+
For more information on how T5 span-masked language modeling works, one can take a look
|
390 |
+
at the `official paper <https://arxiv.org/pdf/1910.10683.pdf>`__
|
391 |
+
or the `official code for preprocessing <https://github.com/google-research/text-to-text-transfer-transformer/blob/master/t5/data/preprocessors.py>`__ .
|
392 |
+
|
393 |
+
Args:
|
394 |
+
tokenizer (:class:`~transformers.PreTrainedTokenizer` or :class:`~transformers.PreTrainedTokenizerFast`):
|
395 |
+
The tokenizer used for encoding the data.
|
396 |
+
noise_density (:obj:`float`):
|
397 |
+
The probability with which to (randomly) mask tokens in the input.
|
398 |
+
mean_noise_span_length (:obj:`float`):
|
399 |
+
The average span length of the masked tokens.
|
400 |
+
input_length (:obj:`int`):
|
401 |
+
The expected input length after masking.
|
402 |
+
target_length (:obj:`int`):
|
403 |
+
The expected target length after masking.
|
404 |
+
pad_token_id: (:obj:`int`):
|
405 |
+
The pad token id of the model
|
406 |
+
decoder_start_token_id: (:obj:`int):
|
407 |
+
The decoder start token id of the model
|
408 |
+
"""
|
409 |
+
|
410 |
+
tokenizer: PreTrainedTokenizerBase
|
411 |
+
noise_density: float
|
412 |
+
mean_noise_span_length: float
|
413 |
+
input_length: int
|
414 |
+
target_length: int
|
415 |
+
pad_token_id: int
|
416 |
+
decoder_start_token_id: int
|
417 |
+
|
418 |
+
def __call__(self, examples: List[Dict[str, np.ndarray]]) -> Dict[str, np.ndarray]:
|
419 |
+
# convert list to dict and tensorize input
|
420 |
+
batch = BatchEncoding(
|
421 |
+
{
|
422 |
+
k: np.array([examples[i][k] for i in range(len(examples))])
|
423 |
+
for k, v in examples[0].items()
|
424 |
+
}
|
425 |
+
)
|
426 |
+
|
427 |
+
input_ids = batch["input_ids"]
|
428 |
+
batch_size, expandend_input_length = input_ids.shape
|
429 |
+
|
430 |
+
mask_indices = np.asarray(
|
431 |
+
[
|
432 |
+
self.random_spans_noise_mask(expandend_input_length)
|
433 |
+
for i in range(batch_size)
|
434 |
+
]
|
435 |
+
)
|
436 |
+
labels_mask = ~mask_indices
|
437 |
+
|
438 |
+
input_ids_sentinel = self.create_sentinel_ids(mask_indices.astype(np.int8))
|
439 |
+
labels_sentinel = self.create_sentinel_ids(labels_mask.astype(np.int8))
|
440 |
+
|
441 |
+
batch["input_ids"] = self.filter_input_ids(input_ids, input_ids_sentinel)
|
442 |
+
batch["labels"] = self.filter_input_ids(input_ids, labels_sentinel)
|
443 |
+
|
444 |
+
if batch["input_ids"].shape[-1] != self.input_length:
|
445 |
+
raise ValueError(
|
446 |
+
f"`input_ids` are incorrectly preprocessed. `input_ids` length is {batch['input_ids'].shape[-1]}, but"
|
447 |
+
f" should be {self.target_length}."
|
448 |
+
)
|
449 |
+
|
450 |
+
if batch["labels"].shape[-1] != self.target_length:
|
451 |
+
raise ValueError(
|
452 |
+
f"`labels` are incorrectly preprocessed. `labels` length is {batch['labels'].shape[-1]}, but should be"
|
453 |
+
f" {self.target_length}."
|
454 |
+
)
|
455 |
+
|
456 |
+
# to check that tokens are correctly preprocessed, one can run `self.tokenizer.batch_decode(input_ids)` and `self.tokenizer.batch_decode(labels)` here...
|
457 |
+
batch["decoder_input_ids"] = shift_tokens_right(
|
458 |
+
batch["labels"], self.pad_token_id, self.decoder_start_token_id
|
459 |
+
)
|
460 |
+
|
461 |
+
return batch
|
462 |
+
|
463 |
+
def create_sentinel_ids(self, mask_indices):
|
464 |
+
"""
|
465 |
+
Sentinel ids creation given the indices that should be masked.
|
466 |
+
The start indices of each mask are replaced by the sentinel ids in increasing
|
467 |
+
order. Consecutive mask indices to be deleted are replaced with `-1`.
|
468 |
+
"""
|
469 |
+
start_indices = mask_indices - np.roll(mask_indices, 1, axis=-1) * mask_indices
|
470 |
+
start_indices[:, 0] = mask_indices[:, 0]
|
471 |
+
|
472 |
+
sentinel_ids = np.where(
|
473 |
+
start_indices != 0, np.cumsum(start_indices, axis=-1), start_indices
|
474 |
+
)
|
475 |
+
sentinel_ids = np.where(
|
476 |
+
sentinel_ids != 0, (len(self.tokenizer) - sentinel_ids), 0
|
477 |
+
)
|
478 |
+
sentinel_ids -= mask_indices - start_indices
|
479 |
+
|
480 |
+
return sentinel_ids
|
481 |
+
|
482 |
+
def filter_input_ids(self, input_ids, sentinel_ids):
|
483 |
+
"""
|
484 |
+
Puts sentinel mask on `input_ids` and fuse consecutive mask tokens into a single mask token by deleting.
|
485 |
+
This will reduce the sequence length from `expanded_inputs_length` to `input_length`.
|
486 |
+
"""
|
487 |
+
batch_size = input_ids.shape[0]
|
488 |
+
|
489 |
+
input_ids_full = np.where(sentinel_ids != 0, sentinel_ids, input_ids)
|
490 |
+
# input_ids tokens and sentinel tokens are >= 0, tokens < 0 are
|
491 |
+
# masked tokens coming after sentinel tokens and should be removed
|
492 |
+
input_ids = input_ids_full[input_ids_full >= 0].reshape((batch_size, -1))
|
493 |
+
input_ids = np.concatenate(
|
494 |
+
[
|
495 |
+
input_ids,
|
496 |
+
np.full((batch_size, 1), self.tokenizer.eos_token_id, dtype=np.int32),
|
497 |
+
],
|
498 |
+
axis=-1,
|
499 |
+
)
|
500 |
+
return input_ids
|
501 |
+
|
502 |
+
def random_spans_noise_mask(self, length):
|
503 |
+
"""This function is copy of `random_spans_helper <https://github.com/google-research/text-to-text-transfer-transformer/blob/84f8bcc14b5f2c03de51bd3587609ba8f6bbd1cd/t5/data/preprocessors.py#L2682>`__ .
|
504 |
+
|
505 |
+
Noise mask consisting of random spans of noise tokens.
|
506 |
+
The number of noise tokens and the number of noise spans and non-noise spans
|
507 |
+
are determined deterministically as follows:
|
508 |
+
num_noise_tokens = round(length * noise_density)
|
509 |
+
num_nonnoise_spans = num_noise_spans = round(num_noise_tokens / mean_noise_span_length)
|
510 |
+
Spans alternate between non-noise and noise, beginning with non-noise.
|
511 |
+
Subject to the above restrictions, all masks are equally likely.
|
512 |
+
|
513 |
+
Args:
|
514 |
+
length: an int32 scalar (length of the incoming token sequence)
|
515 |
+
noise_density: a float - approximate density of output mask
|
516 |
+
mean_noise_span_length: a number
|
517 |
+
|
518 |
+
Returns:
|
519 |
+
a boolean tensor with shape [length]
|
520 |
+
"""
|
521 |
+
|
522 |
+
orig_length = length
|
523 |
+
|
524 |
+
num_noise_tokens = int(np.round(length * self.noise_density))
|
525 |
+
# avoid degeneracy by ensuring positive numbers of noise and nonnoise tokens.
|
526 |
+
num_noise_tokens = min(max(num_noise_tokens, 1), length - 1)
|
527 |
+
num_noise_spans = int(np.round(num_noise_tokens / self.mean_noise_span_length))
|
528 |
+
|
529 |
+
# avoid degeneracy by ensuring positive number of noise spans
|
530 |
+
num_noise_spans = max(num_noise_spans, 1)
|
531 |
+
num_nonnoise_tokens = length - num_noise_tokens
|
532 |
+
|
533 |
+
# pick the lengths of the noise spans and the non-noise spans
|
534 |
+
def _random_segmentation(num_items, num_segments):
|
535 |
+
"""Partition a sequence of items randomly into non-empty segments.
|
536 |
+
Args:
|
537 |
+
num_items: an integer scalar > 0
|
538 |
+
num_segments: an integer scalar in [1, num_items]
|
539 |
+
Returns:
|
540 |
+
a Tensor with shape [num_segments] containing positive integers that add
|
541 |
+
up to num_items
|
542 |
+
"""
|
543 |
+
mask_indices = np.arange(num_items - 1) < (num_segments - 1)
|
544 |
+
np.random.shuffle(mask_indices)
|
545 |
+
first_in_segment = np.pad(mask_indices, [[1, 0]])
|
546 |
+
segment_id = np.cumsum(first_in_segment)
|
547 |
+
# count length of sub segments assuming that list is sorted
|
548 |
+
_, segment_length = np.unique(segment_id, return_counts=True)
|
549 |
+
return segment_length
|
550 |
+
|
551 |
+
noise_span_lengths = _random_segmentation(num_noise_tokens, num_noise_spans)
|
552 |
+
nonnoise_span_lengths = _random_segmentation(
|
553 |
+
num_nonnoise_tokens, num_noise_spans
|
554 |
+
)
|
555 |
+
|
556 |
+
interleaved_span_lengths = np.reshape(
|
557 |
+
np.stack([nonnoise_span_lengths, noise_span_lengths], axis=1),
|
558 |
+
[num_noise_spans * 2],
|
559 |
+
)
|
560 |
+
span_starts = np.cumsum(interleaved_span_lengths)[:-1]
|
561 |
+
span_start_indicator = np.zeros((length,), dtype=np.int8)
|
562 |
+
span_start_indicator[span_starts] = True
|
563 |
+
span_num = np.cumsum(span_start_indicator)
|
564 |
+
is_noise = np.equal(span_num % 2, 1)
|
565 |
+
|
566 |
+
return is_noise[:orig_length]
|
567 |
+
|
568 |
+
|
569 |
+
def generate_batch_splits(samples_idx: jnp.ndarray, batch_size: int) -> jnp.ndarray:
|
570 |
+
num_samples = len(samples_idx)
|
571 |
+
samples_to_remove = num_samples % batch_size
|
572 |
+
|
573 |
+
if samples_to_remove != 0:
|
574 |
+
samples_idx = samples_idx[:-samples_to_remove]
|
575 |
+
sections_split = num_samples // batch_size
|
576 |
+
batch_idx = np.split(samples_idx, sections_split)
|
577 |
+
return batch_idx
|
578 |
+
|
579 |
+
|
580 |
+
def write_train_metric(summary_writer, train_metrics, train_time, step):
|
581 |
+
summary_writer.scalar("train_time", train_time, step)
|
582 |
+
|
583 |
+
train_metrics = get_metrics(train_metrics)
|
584 |
+
for key, vals in train_metrics.items():
|
585 |
+
tag = f"train_{key}"
|
586 |
+
for i, val in enumerate(vals):
|
587 |
+
summary_writer.scalar(tag, val, step - len(vals) + i + 1)
|
588 |
+
|
589 |
+
|
590 |
+
def write_eval_metric(summary_writer, eval_metrics, step):
|
591 |
+
for metric_name, value in eval_metrics.items():
|
592 |
+
summary_writer.scalar(f"eval_{metric_name}", value, step)
|
593 |
+
|
594 |
+
|
595 |
+
def main():
|
596 |
+
# See all possible arguments in src/transformers/training_args.py
|
597 |
+
# or by passing the --help flag to this script.
|
598 |
+
# We now keep distinct sets of args, for a cleaner separation of concerns.
|
599 |
+
|
600 |
+
parser = HfArgumentParser(
|
601 |
+
(ModelArguments, DataTrainingArguments, TrainingArguments)
|
602 |
+
)
|
603 |
+
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
|
604 |
+
# If we pass only one argument to the script and it's the path to a json file,
|
605 |
+
# let's parse it to get our arguments.
|
606 |
+
model_args, data_args, training_args = parser.parse_json_file(
|
607 |
+
json_file=os.path.abspath(sys.argv[1])
|
608 |
+
)
|
609 |
+
else:
|
610 |
+
model_args, data_args, training_args = parser.parse_args_into_dataclasses()
|
611 |
+
|
612 |
+
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
|
613 |
+
# information sent is the one passed as arguments along with your Python/PyTorch versions.
|
614 |
+
# send_example_telemetry("run_t5_mlm", model_args, data_args, framework="flax")
|
615 |
+
|
616 |
+
if (
|
617 |
+
os.path.exists(training_args.output_dir)
|
618 |
+
and os.listdir(training_args.output_dir)
|
619 |
+
and training_args.do_train
|
620 |
+
and not training_args.overwrite_output_dir
|
621 |
+
):
|
622 |
+
raise ValueError(
|
623 |
+
f"Output directory ({training_args.output_dir}) already exists and is not empty."
|
624 |
+
"Use --overwrite_output_dir to overcome."
|
625 |
+
)
|
626 |
+
|
627 |
+
# Setup logging
|
628 |
+
logging.basicConfig(
|
629 |
+
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
|
630 |
+
level=logging.INFO,
|
631 |
+
datefmt="[%X]",
|
632 |
+
)
|
633 |
+
|
634 |
+
# Log on each process the small summary:
|
635 |
+
logger = logging.getLogger(__name__)
|
636 |
+
|
637 |
+
# Set the verbosity to info of the Transformers logger (on main process only):
|
638 |
+
logger.info(f"Training/evaluation parameters {training_args}")
|
639 |
+
|
640 |
+
# Set seed before initializing model.
|
641 |
+
set_seed(training_args.seed)
|
642 |
+
|
643 |
+
# Handle the repository creation
|
644 |
+
# if training_args.push_to_hub:
|
645 |
+
# if training_args.hub_model_id is None:
|
646 |
+
# repo_name = get_full_repo_name(
|
647 |
+
# Path(training_args.output_dir).absolute().name, token=training_args.hub_token
|
648 |
+
# )
|
649 |
+
# else:
|
650 |
+
# repo_name = training_args.hub_model_id
|
651 |
+
# repo = Repository(training_args.output_dir, clone_from=repo_name)
|
652 |
+
|
653 |
+
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
654 |
+
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
655 |
+
# (the dataset will be downloaded automatically from the datasets Hub).
|
656 |
+
#
|
657 |
+
# For CSV/JSON files, this script will use the column called 'text' or the first column if no column called
|
658 |
+
# 'text' is found. You can easily tweak this behavior (see below).
|
659 |
+
if data_args.dataset_name is not None:
|
660 |
+
# Downloading and loading a dataset from the hub.
|
661 |
+
datasets = load_dataset(
|
662 |
+
data_args.dataset_name,
|
663 |
+
data_args.dataset_config_name,
|
664 |
+
cache_dir=model_args.cache_dir,
|
665 |
+
# use_auth_token=True if model_args.use_auth_token else None,
|
666 |
+
)
|
667 |
+
|
668 |
+
if "validation" not in datasets.keys():
|
669 |
+
datasets["validation"] = load_dataset(
|
670 |
+
data_args.dataset_name,
|
671 |
+
data_args.dataset_config_name,
|
672 |
+
split=f"train[:{data_args.validation_split_percentage}%]",
|
673 |
+
cache_dir=model_args.cache_dir,
|
674 |
+
# use_auth_token=True if model_args.use_auth_token else None,
|
675 |
+
)
|
676 |
+
datasets["train"] = load_dataset(
|
677 |
+
data_args.dataset_name,
|
678 |
+
data_args.dataset_config_name,
|
679 |
+
split=f"train[{data_args.validation_split_percentage}%:]",
|
680 |
+
cache_dir=model_args.cache_dir,
|
681 |
+
# use_auth_token=True if model_args.use_auth_token else None,
|
682 |
+
)
|
683 |
+
else:
|
684 |
+
data_files = {}
|
685 |
+
if data_args.train_file is not None:
|
686 |
+
data_files["train"] = data_args.train_file
|
687 |
+
if data_args.validation_file is not None:
|
688 |
+
data_files["validation"] = data_args.validation_file
|
689 |
+
extension = data_args.train_file.split(".")[-1]
|
690 |
+
if extension == "txt":
|
691 |
+
extension = "text"
|
692 |
+
datasets = load_dataset(
|
693 |
+
extension,
|
694 |
+
data_files=data_files,
|
695 |
+
cache_dir=model_args.cache_dir,
|
696 |
+
# use_auth_token=True if model_args.use_auth_token else None,
|
697 |
+
)
|
698 |
+
|
699 |
+
if "validation" not in datasets.keys():
|
700 |
+
datasets["validation"] = load_dataset(
|
701 |
+
extension,
|
702 |
+
data_files=data_files,
|
703 |
+
split=f"train[:{data_args.validation_split_percentage}%]",
|
704 |
+
cache_dir=model_args.cache_dir,
|
705 |
+
# use_auth_token=True if model_args.use_auth_token else None,
|
706 |
+
)
|
707 |
+
datasets["train"] = load_dataset(
|
708 |
+
extension,
|
709 |
+
data_files=data_files,
|
710 |
+
split=f"train[{data_args.validation_split_percentage}%:]",
|
711 |
+
cache_dir=model_args.cache_dir,
|
712 |
+
# use_auth_token=True if model_args.use_auth_token else None,
|
713 |
+
)
|
714 |
+
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
715 |
+
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
716 |
+
|
717 |
+
# Load pretrained model and tokenizer
|
718 |
+
|
719 |
+
if model_args.tokenizer_name:
|
720 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
721 |
+
model_args.tokenizer_name,
|
722 |
+
cache_dir=model_args.cache_dir,
|
723 |
+
use_fast=model_args.use_fast_tokenizer,
|
724 |
+
# use_auth_token=True if model_args.use_auth_token else None,
|
725 |
+
)
|
726 |
+
elif model_args.model_name_or_path:
|
727 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
728 |
+
model_args.model_name_or_path,
|
729 |
+
cache_dir=model_args.cache_dir,
|
730 |
+
use_fast=model_args.use_fast_tokenizer,
|
731 |
+
# use_auth_token=True if model_args.use_auth_token else None,
|
732 |
+
)
|
733 |
+
else:
|
734 |
+
raise ValueError(
|
735 |
+
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
|
736 |
+
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
|
737 |
+
)
|
738 |
+
|
739 |
+
if model_args.config_name:
|
740 |
+
config = T5Config.from_pretrained(
|
741 |
+
model_args.config_name,
|
742 |
+
cache_dir=model_args.cache_dir,
|
743 |
+
vocab_size=len(tokenizer),
|
744 |
+
# use_auth_token=True if model_args.use_auth_token else None,
|
745 |
+
)
|
746 |
+
elif model_args.model_name_or_path:
|
747 |
+
config = T5Config.from_pretrained(
|
748 |
+
model_args.model_name_or_path,
|
749 |
+
cache_dir=model_args.cache_dir,
|
750 |
+
use_auth_token=True if model_args.use_auth_token else None,
|
751 |
+
)
|
752 |
+
else:
|
753 |
+
config = CONFIG_MAPPING[model_args.model_type]()
|
754 |
+
logger.warning("You are instantiating a new config instance from scratch.")
|
755 |
+
|
756 |
+
# Preprocessing the datasets.
|
757 |
+
# First we tokenize all the texts.
|
758 |
+
if training_args.do_train:
|
759 |
+
column_names = datasets["train"].column_names
|
760 |
+
else:
|
761 |
+
column_names = datasets["validation"].column_names
|
762 |
+
text_column_name = "text" if "text" in column_names else column_names[0]
|
763 |
+
|
764 |
+
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
|
765 |
+
|
766 |
+
# Otherwise, we tokenize every text, then concatenate them together before splitting them in smaller parts.
|
767 |
+
# Since we make sure that all sequences are of the same length, no attention_mask is needed.
|
768 |
+
def tokenize_function(examples):
|
769 |
+
return tokenizer(examples[text_column_name], return_attention_mask=False)
|
770 |
+
|
771 |
+
tokenized_datasets = datasets.map(
|
772 |
+
tokenize_function,
|
773 |
+
batched=True,
|
774 |
+
num_proc=data_args.preprocessing_num_workers,
|
775 |
+
remove_columns=column_names,
|
776 |
+
load_from_cache_file=not data_args.overwrite_cache,
|
777 |
+
)
|
778 |
+
|
779 |
+
# T5-like span masked language modeling will fuse consecutively masked tokens to a single sentinel token.
|
780 |
+
# To ensure that the input length is `max_seq_length`, we need to increase the maximum length
|
781 |
+
# according to `mlm_probability` and `mean_noise_span_length`. We can also define the label length accordingly.
|
782 |
+
expanded_inputs_length, targets_length = compute_input_and_target_lengths(
|
783 |
+
inputs_length=max_seq_length,
|
784 |
+
noise_density=data_args.mlm_probability,
|
785 |
+
mean_noise_span_length=data_args.mean_noise_span_length,
|
786 |
+
)
|
787 |
+
|
788 |
+
# Main data processing function that will concatenate all texts from our dataset and generate chunks of expanded_inputs_length.
|
789 |
+
def group_texts(examples):
|
790 |
+
# Concatenate all texts.
|
791 |
+
concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()}
|
792 |
+
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
793 |
+
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
794 |
+
# customize this part to your needs.
|
795 |
+
if total_length >= expanded_inputs_length:
|
796 |
+
total_length = (
|
797 |
+
total_length // expanded_inputs_length
|
798 |
+
) * expanded_inputs_length
|
799 |
+
# Split by chunks of max_len.
|
800 |
+
result = {
|
801 |
+
k: [
|
802 |
+
t[i : i + expanded_inputs_length]
|
803 |
+
for i in range(0, total_length, expanded_inputs_length)
|
804 |
+
]
|
805 |
+
for k, t in concatenated_examples.items()
|
806 |
+
}
|
807 |
+
return result
|
808 |
+
|
809 |
+
# Note that with `batched=True`, this map processes 1,000 texts together, so group_texts throws away a
|
810 |
+
# remainder for each of those groups of 1,000 texts. You can adjust that batch_size here but a higher value
|
811 |
+
# might be slower to preprocess.
|
812 |
+
#
|
813 |
+
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
|
814 |
+
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
|
815 |
+
tokenized_datasets = tokenized_datasets.map(
|
816 |
+
group_texts,
|
817 |
+
batched=True,
|
818 |
+
num_proc=data_args.preprocessing_num_workers,
|
819 |
+
load_from_cache_file=not data_args.overwrite_cache,
|
820 |
+
)
|
821 |
+
|
822 |
+
# Enable tensorboard only on the master node
|
823 |
+
has_tensorboard = is_tensorboard_available()
|
824 |
+
if has_tensorboard and jax.process_index() == 0:
|
825 |
+
try:
|
826 |
+
from flax.metrics.tensorboard import SummaryWriter
|
827 |
+
|
828 |
+
summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir))
|
829 |
+
except ImportError as ie:
|
830 |
+
has_tensorboard = False
|
831 |
+
logger.warning(
|
832 |
+
f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
|
833 |
+
)
|
834 |
+
else:
|
835 |
+
logger.warning(
|
836 |
+
"Unable to display metrics through TensorBoard because the package is not installed: "
|
837 |
+
"Please run pip install tensorboard to enable."
|
838 |
+
)
|
839 |
+
|
840 |
+
# Initialize our training
|
841 |
+
rng = jax.random.PRNGKey(training_args.seed)
|
842 |
+
dropout_rngs = jax.random.split(rng, jax.local_device_count())
|
843 |
+
|
844 |
+
if model_args.model_name_or_path:
|
845 |
+
model = FlaxT5ForConditionalGeneration.from_pretrained(
|
846 |
+
model_args.model_name_or_path,
|
847 |
+
config=config,
|
848 |
+
seed=training_args.seed,
|
849 |
+
dtype=getattr(jnp, model_args.dtype),
|
850 |
+
# use_auth_token=True if model_args.use_auth_token else None,
|
851 |
+
)
|
852 |
+
else:
|
853 |
+
config.vocab_size = len(tokenizer)
|
854 |
+
model = FlaxT5ForConditionalGeneration(
|
855 |
+
config,
|
856 |
+
seed=training_args.seed,
|
857 |
+
dtype=getattr(jnp, model_args.dtype),
|
858 |
+
# use_auth_token=True if model_args.use_auth_token else None,
|
859 |
+
)
|
860 |
+
|
861 |
+
# Data collator
|
862 |
+
# This one will take care of randomly masking the tokens.
|
863 |
+
data_collator = FlaxDataCollatorForT5MLM(
|
864 |
+
tokenizer=tokenizer,
|
865 |
+
noise_density=data_args.mlm_probability,
|
866 |
+
mean_noise_span_length=data_args.mean_noise_span_length,
|
867 |
+
input_length=max_seq_length,
|
868 |
+
target_length=targets_length,
|
869 |
+
pad_token_id=model.config.pad_token_id,
|
870 |
+
decoder_start_token_id=model.config.decoder_start_token_id,
|
871 |
+
)
|
872 |
+
|
873 |
+
# Store some constant
|
874 |
+
num_epochs = int(training_args.num_train_epochs)
|
875 |
+
train_batch_size = (
|
876 |
+
int(training_args.per_device_train_batch_size) * jax.device_count()
|
877 |
+
)
|
878 |
+
eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
|
879 |
+
|
880 |
+
num_train_steps = len(tokenized_datasets["train"]) // train_batch_size * num_epochs
|
881 |
+
|
882 |
+
num_of_hosts = jax.process_count()
|
883 |
+
current_host_idx = jax.process_index()
|
884 |
+
|
885 |
+
# Create learning rate schedule
|
886 |
+
warmup_fn = optax.linear_schedule(
|
887 |
+
init_value=0.0,
|
888 |
+
end_value=training_args.learning_rate,
|
889 |
+
transition_steps=training_args.warmup_steps,
|
890 |
+
)
|
891 |
+
decay_fn = optax.linear_schedule(
|
892 |
+
init_value=training_args.learning_rate,
|
893 |
+
end_value=0,
|
894 |
+
transition_steps=num_train_steps - training_args.warmup_steps,
|
895 |
+
)
|
896 |
+
linear_decay_lr_schedule_fn = optax.join_schedules(
|
897 |
+
schedules=[warmup_fn, decay_fn], boundaries=[training_args.warmup_steps]
|
898 |
+
)
|
899 |
+
|
900 |
+
# We use Optax's "masking" functionality to not apply weight decay
|
901 |
+
# to bias and LayerNorm scale parameters. decay_mask_fn returns a
|
902 |
+
# mask boolean with the same structure as the parameters.
|
903 |
+
# The mask is True for parameters that should be decayed.
|
904 |
+
def decay_mask_fn(params):
|
905 |
+
flat_params = traverse_util.flatten_dict(params)
|
906 |
+
flat_mask = {
|
907 |
+
path: (
|
908 |
+
path[-1] != "bias"
|
909 |
+
and path[-2:]
|
910 |
+
not in [("layer_norm", "scale"), ("final_layer_norm", "scale")]
|
911 |
+
)
|
912 |
+
for path in flat_params
|
913 |
+
}
|
914 |
+
return traverse_util.unflatten_dict(flat_mask)
|
915 |
+
|
916 |
+
# create adam optimizer
|
917 |
+
if training_args.adafactor:
|
918 |
+
# We use the default parameters here to initialize adafactor,
|
919 |
+
# For more details about the parameters please check https://github.com/deepmind/optax/blob/ed02befef9bf81cbbf236be3d2b0e032e9ed4a40/optax/_src/alias.py#L74
|
920 |
+
optimizer = optax.adafactor(
|
921 |
+
learning_rate=linear_decay_lr_schedule_fn,
|
922 |
+
)
|
923 |
+
else:
|
924 |
+
optimizer = optax.adamw(
|
925 |
+
learning_rate=linear_decay_lr_schedule_fn,
|
926 |
+
b1=training_args.adam_beta1,
|
927 |
+
b2=training_args.adam_beta2,
|
928 |
+
weight_decay=training_args.weight_decay,
|
929 |
+
mask=decay_mask_fn,
|
930 |
+
)
|
931 |
+
|
932 |
+
# Setup train state
|
933 |
+
state = train_state.TrainState.create(
|
934 |
+
apply_fn=model.__call__, params=model.params, tx=optimizer
|
935 |
+
)
|
936 |
+
|
937 |
+
# Define gradient update step fn
|
938 |
+
def train_step(state, batch, dropout_rng):
|
939 |
+
dropout_rng, new_dropout_rng = jax.random.split(dropout_rng)
|
940 |
+
|
941 |
+
def loss_fn(params):
|
942 |
+
labels = batch.pop("labels")
|
943 |
+
|
944 |
+
logits = state.apply_fn(
|
945 |
+
**batch, params=params, dropout_rng=dropout_rng, train=True
|
946 |
+
)[0]
|
947 |
+
|
948 |
+
# compute loss
|
949 |
+
loss = optax.softmax_cross_entropy(
|
950 |
+
logits, onehot(labels, logits.shape[-1])
|
951 |
+
).mean()
|
952 |
+
|
953 |
+
return loss
|
954 |
+
|
955 |
+
grad_fn = jax.value_and_grad(loss_fn)
|
956 |
+
loss, grad = grad_fn(state.params)
|
957 |
+
grad = jax.lax.pmean(grad, "batch")
|
958 |
+
new_state = state.apply_gradients(grads=grad)
|
959 |
+
|
960 |
+
metrics = jax.lax.pmean(
|
961 |
+
{"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)},
|
962 |
+
axis_name="batch",
|
963 |
+
)
|
964 |
+
|
965 |
+
return new_state, metrics, new_dropout_rng
|
966 |
+
|
967 |
+
# Create parallel version of the train step
|
968 |
+
p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
|
969 |
+
|
970 |
+
# Define eval fn
|
971 |
+
def eval_step(params, batch):
|
972 |
+
labels = batch.pop("labels")
|
973 |
+
|
974 |
+
logits = model(**batch, params=params, train=False)[0]
|
975 |
+
|
976 |
+
# compute loss
|
977 |
+
loss = optax.softmax_cross_entropy(logits, onehot(labels, logits.shape[-1]))
|
978 |
+
|
979 |
+
# compute accuracy
|
980 |
+
accuracy = jnp.equal(jnp.argmax(logits, axis=-1), labels)
|
981 |
+
|
982 |
+
# summarize metrics
|
983 |
+
metrics = {"loss": loss.mean(), "accuracy": accuracy.mean()}
|
984 |
+
metrics = jax.lax.pmean(metrics, axis_name="batch")
|
985 |
+
|
986 |
+
return metrics
|
987 |
+
|
988 |
+
p_eval_step = jax.pmap(eval_step, "batch", donate_argnums=(0,))
|
989 |
+
|
990 |
+
# Replicate the train state on each device
|
991 |
+
state = jax_utils.replicate(state)
|
992 |
+
|
993 |
+
train_time = 0
|
994 |
+
eval_loss = float("inf")
|
995 |
+
epochs = tqdm(range(num_epochs), desc="Epoch ... ", position=0)
|
996 |
+
for epoch in epochs:
|
997 |
+
# ======================== Training ================================
|
998 |
+
train_start = time.time()
|
999 |
+
train_metrics = []
|
1000 |
+
|
1001 |
+
# Create sampling rng
|
1002 |
+
rng, input_rng = jax.random.split(rng)
|
1003 |
+
|
1004 |
+
# Generate an epoch by shuffling sampling indices from the train dataset
|
1005 |
+
num_train_samples = len(tokenized_datasets["train"])
|
1006 |
+
train_samples_idx = np.random.permutation(np.arange(num_train_samples))
|
1007 |
+
train_batch_idx = generate_batch_splits(train_samples_idx, train_batch_size)
|
1008 |
+
|
1009 |
+
# Gather the indexes for creating the batch and do a training step
|
1010 |
+
for step, batch_idx in enumerate(
|
1011 |
+
tqdm(train_batch_idx, desc="Training...", position=1)
|
1012 |
+
):
|
1013 |
+
samples = [tokenized_datasets["train"][int(idx)] for idx in batch_idx]
|
1014 |
+
model_inputs = data_collator(samples)
|
1015 |
+
|
1016 |
+
local_host_model_inputs = {
|
1017 |
+
key: np.split(model_inputs.data[key], num_of_hosts, axis=0)[
|
1018 |
+
current_host_idx
|
1019 |
+
]
|
1020 |
+
for key, value in model_inputs.data.items()
|
1021 |
+
}
|
1022 |
+
|
1023 |
+
# Model forward
|
1024 |
+
model_inputs = shard(local_host_model_inputs)
|
1025 |
+
state, train_metric, dropout_rngs = p_train_step(
|
1026 |
+
state, model_inputs, dropout_rngs
|
1027 |
+
)
|
1028 |
+
train_metrics.append(train_metric)
|
1029 |
+
|
1030 |
+
cur_step = epoch * (num_train_samples // train_batch_size) + step
|
1031 |
+
|
1032 |
+
if cur_step % training_args.logging_steps == 0 and cur_step > 0:
|
1033 |
+
# Save metrics
|
1034 |
+
train_metric = jax_utils.unreplicate(train_metric)
|
1035 |
+
train_time += time.time() - train_start
|
1036 |
+
if has_tensorboard and jax.process_index() == 0:
|
1037 |
+
write_train_metric(
|
1038 |
+
summary_writer, train_metrics, train_time, cur_step
|
1039 |
+
)
|
1040 |
+
|
1041 |
+
epochs.write(
|
1042 |
+
f"Step... ({cur_step} | Loss: {train_metric['loss'].mean()}, Learning Rate:"
|
1043 |
+
f" {train_metric['learning_rate'].mean()})"
|
1044 |
+
)
|
1045 |
+
|
1046 |
+
train_metrics = []
|
1047 |
+
|
1048 |
+
if cur_step % training_args.eval_steps == 0 and cur_step > 0:
|
1049 |
+
# ======================== Evaluating ==============================
|
1050 |
+
num_eval_samples = len(tokenized_datasets["validation"])
|
1051 |
+
eval_samples_idx = jnp.arange(num_eval_samples)
|
1052 |
+
eval_batch_idx = generate_batch_splits(
|
1053 |
+
eval_samples_idx, eval_batch_size
|
1054 |
+
)
|
1055 |
+
|
1056 |
+
eval_metrics = []
|
1057 |
+
for i, batch_idx in enumerate(
|
1058 |
+
tqdm(eval_batch_idx, desc="Evaluating ...", position=2)
|
1059 |
+
):
|
1060 |
+
samples = [
|
1061 |
+
tokenized_datasets["validation"][int(idx)] for idx in batch_idx
|
1062 |
+
]
|
1063 |
+
model_inputs = data_collator(samples)
|
1064 |
+
|
1065 |
+
# Model forward
|
1066 |
+
model_inputs = shard(model_inputs.data)
|
1067 |
+
metrics = p_eval_step(state.params, model_inputs)
|
1068 |
+
eval_metrics.append(metrics)
|
1069 |
+
|
1070 |
+
# get eval metrics
|
1071 |
+
eval_metrics = get_metrics(eval_metrics)
|
1072 |
+
eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
|
1073 |
+
|
1074 |
+
# Update progress bar
|
1075 |
+
epochs.write(
|
1076 |
+
f"Step... ({cur_step} | Loss: {eval_metrics['loss']}, Acc: {eval_metrics['accuracy']})"
|
1077 |
+
)
|
1078 |
+
|
1079 |
+
# Save metrics
|
1080 |
+
if has_tensorboard and jax.process_index() == 0:
|
1081 |
+
write_eval_metric(summary_writer, eval_metrics, cur_step)
|
1082 |
+
|
1083 |
+
# Save model if eval_metrics['loss'] < eval_loss
|
1084 |
+
if eval_metrics["loss"] < eval_loss:
|
1085 |
+
eval_loss = eval_metrics["loss"]
|
1086 |
+
if jax.process_index() == 0:
|
1087 |
+
params = jax.device_get(
|
1088 |
+
jax.tree_map(lambda x: x[0], state.params)
|
1089 |
+
)
|
1090 |
+
model.save_pretrained(training_args.output_dir, params=params)
|
1091 |
+
tokenizer.save_pretrained(training_args.output_dir)
|
1092 |
+
print(
|
1093 |
+
f"Step: {cur_step}, Current eval_loss is {eval_loss}, checkpoint is saved!!"
|
1094 |
+
)
|
1095 |
+
else:
|
1096 |
+
eval_loss = eval_metrics["loss"]
|
1097 |
+
print(f"Step: {cur_step}, Current eval_loss is {eval_loss}")
|
1098 |
+
|
1099 |
+
# if cur_step % training_args.save_steps == 0 and cur_step > 0:
|
1100 |
+
# # save checkpoint after each epoch and push checkpoint to the hub
|
1101 |
+
# if jax.process_index() == 0:
|
1102 |
+
# params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
|
1103 |
+
# model.save_pretrained(training_args.output_dir, params=params)
|
1104 |
+
# tokenizer.save_pretrained(training_args.output_dir)
|
1105 |
+
# if training_args.push_to_hub:
|
1106 |
+
# repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False)
|
1107 |
+
|
1108 |
+
# Eval after training
|
1109 |
+
if training_args.do_eval:
|
1110 |
+
num_eval_samples = len(tokenized_datasets["validation"])
|
1111 |
+
eval_samples_idx = jnp.arange(num_eval_samples)
|
1112 |
+
eval_batch_idx = generate_batch_splits(eval_samples_idx, eval_batch_size)
|
1113 |
+
|
1114 |
+
eval_metrics = []
|
1115 |
+
for i, batch_idx in enumerate(
|
1116 |
+
tqdm(eval_batch_idx, desc="Evaluating ...", position=2)
|
1117 |
+
):
|
1118 |
+
samples = [tokenized_datasets["validation"][int(idx)] for idx in batch_idx]
|
1119 |
+
model_inputs = data_collator(samples)
|
1120 |
+
|
1121 |
+
# Model forward
|
1122 |
+
model_inputs = shard(model_inputs.data)
|
1123 |
+
metrics = p_eval_step(state.params, model_inputs)
|
1124 |
+
eval_metrics.append(metrics)
|
1125 |
+
|
1126 |
+
# get eval metrics
|
1127 |
+
eval_metrics = get_metrics(eval_metrics)
|
1128 |
+
eval_metrics = jax.tree_map(
|
1129 |
+
lambda metric: jnp.mean(metric).item(), eval_metrics
|
1130 |
+
)
|
1131 |
+
|
1132 |
+
if jax.process_index() == 0:
|
1133 |
+
eval_metrics = {
|
1134 |
+
f"eval_{metric_name}": value
|
1135 |
+
for metric_name, value in eval_metrics.items()
|
1136 |
+
}
|
1137 |
+
path = os.path.join(training_args.output_dir, "eval_results.json")
|
1138 |
+
with open(path, "w") as f:
|
1139 |
+
json.dump(eval_metrics, f, indent=4, sort_keys=True)
|
1140 |
+
|
1141 |
+
|
1142 |
+
if __name__ == "__main__":
|
1143 |
+
main()
|
CompoundT5/CompoundT5/run.sh
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python ./new_run_t5_mlm_flax.py \
|
2 |
+
--output_dir="./CompoundT5-output" \
|
3 |
+
--model_type="t5" \
|
4 |
+
--config_name="./CompoundT5-config" \
|
5 |
+
--tokenizer_name="./CompoundT5-config" \
|
6 |
+
--dataset_name="sagawa/ZINC-canonicalized" \
|
7 |
+
--max_seq_length="512" \
|
8 |
+
--per_device_train_batch_size="5" \
|
9 |
+
--per_device_eval_batch_size="5" \
|
10 |
+
--adafactor \
|
11 |
+
--learning_rate="0.005" \
|
12 |
+
--weight_decay="0.001" \
|
13 |
+
--warmup_steps="2000" \
|
14 |
+
--overwrite_output_dir \
|
15 |
+
--logging_steps="500" \
|
16 |
+
--save_steps="100000" \
|
17 |
+
--num_train_epochs="30" \
|
18 |
+
--do_train \
|
19 |
+
--do_eval \
|
20 |
+
--eval_steps="100000"
|
CompoundT5/README.md
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# CompoundT5
|
2 |
+
Here, we will explain how to do compound pre-training.
|
3 |
+
|
4 |
+
# Installation
|
5 |
+
To get started, you will first need to install the necessary libraries. You can use the requirements.yaml file for this purpose. If the versions of torch and jax do not match your environment, you can change and run the following command:
|
6 |
+
```
|
7 |
+
conda install -c conda-forge rdkit gdown scikit-learn
|
8 |
+
conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch
|
9 |
+
pip install tokenizers==0.12.1 transformers==4.21.0 datasets sentencepiece==0.1.96
|
10 |
+
pip install "jax[cuda11_cudnn82]" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
|
11 |
+
pip install flax
|
12 |
+
```
|
13 |
+
This will install all the necessary libraries for the project.
|
14 |
+
|
15 |
+
The original data used for this study is uploaded to Google Drive and can be found at the following links:
|
16 |
+
・[ZINC](https://drive.google.com/drive/folders/1SgM35D14JUqgNILxaiRQYbZoyooFOF-3)
|
17 |
+
・[ORD](https://drive.google.com/file/d/1Qbsl8_CmdIK_iNNY8F6wATVnDQNSW9Tc/view?usp=drive_link)
|
18 |
+
The pre-processed data is also available on [Hugging Face Hub](https://huggingface.co/sagawa) and can be used directly.
|
19 |
+
|
20 |
+
To download the data, you can run the following command:
|
21 |
+
```
|
22 |
+
python preprocess_data.py
|
23 |
+
```
|
24 |
+
To complete the preparation for compound pre-training, run the following command:
|
25 |
+
```
|
26 |
+
python prepare_model.py
|
27 |
+
```
|
28 |
+
|
29 |
+
# Compound pre-training
|
30 |
+
Run the following command to conduct compound pre-training. In compound pre-training, T5 is trained on the ZINC dataset using span-masked language modeling. The pretraine model (CompoundT5) is uploaded to [Hugging Face Hub](https://huggingface.co/sagawa/CompoundT5).
|
31 |
+
```
|
32 |
+
cd CompoundT5
|
33 |
+
sh run.sh
|
34 |
+
```
|
35 |
+
Please note that if your GPU memory size is small, you may encounter an out-of-memory error during T5 pre-training. If this occurs, you can try reducing the batch size or you can try putting XLA_PYTHON_CLIENT_MEM_FRACTION=.8 before python ./new_run_t5_mlm_flax.py in run.sh file. This reduces GPU memory preallocation.
|
CompoundT5/prepare_model.py
ADDED
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://github.com/huggingface/transformers/blob/main/examples/flax/language-modeling/t5_tokenizer_model.py
|
2 |
+
|
3 |
+
import argparse
|
4 |
+
import json
|
5 |
+
import os
|
6 |
+
import sys
|
7 |
+
from typing import Iterator, List, Union
|
8 |
+
|
9 |
+
import datasets
|
10 |
+
from datasets import load_dataset
|
11 |
+
from tokenizers import (
|
12 |
+
AddedToken,
|
13 |
+
Regex,
|
14 |
+
Tokenizer,
|
15 |
+
decoders,
|
16 |
+
normalizers,
|
17 |
+
pre_tokenizers,
|
18 |
+
trainers,
|
19 |
+
)
|
20 |
+
from tokenizers.implementations.base_tokenizer import BaseTokenizer
|
21 |
+
from tokenizers.models import Unigram
|
22 |
+
from tokenizers.processors import TemplateProcessing
|
23 |
+
from transformers import AutoTokenizer, T5Config
|
24 |
+
|
25 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
26 |
+
from utils import seed_everything
|
27 |
+
|
28 |
+
seed_everything(seed=42)
|
29 |
+
|
30 |
+
script_dir = os.path.abspath(os.path.dirname(__file__))
|
31 |
+
project_root = os.path.abspath(os.path.join(script_dir, ".."))
|
32 |
+
data_dir = os.path.join(project_root, "data")
|
33 |
+
|
34 |
+
|
35 |
+
class SentencePieceUnigramTokenizer(BaseTokenizer):
|
36 |
+
"""
|
37 |
+
This class is a copy of `DeDLOC's tokenizer implementation <https://github.com/yandex-research/DeDLOC/blob/main/sahajbert/tokenizer/tokenizer_model.py>`__ .
|
38 |
+
|
39 |
+
Custom SentencePiece Unigram Tokenizer with NMT, NKFC, spaces and lower-casing characters normalization
|
40 |
+
Represents the Unigram algorithm, with the pretokenization used by SentencePiece
|
41 |
+
"""
|
42 |
+
|
43 |
+
def __init__(
|
44 |
+
self,
|
45 |
+
replacement: str = "▁",
|
46 |
+
add_prefix_space: bool = True,
|
47 |
+
unk_token: Union[str, AddedToken] = "<unk>",
|
48 |
+
eos_token: Union[str, AddedToken] = "</s>",
|
49 |
+
pad_token: Union[str, AddedToken] = "<pad>",
|
50 |
+
):
|
51 |
+
self.special_tokens = {
|
52 |
+
"pad": {"id": 0, "token": pad_token},
|
53 |
+
"eos": {"id": 1, "token": eos_token},
|
54 |
+
"unk": {"id": 2, "token": unk_token},
|
55 |
+
}
|
56 |
+
|
57 |
+
self.special_tokens_list = [None] * len(self.special_tokens)
|
58 |
+
for token_dict in self.special_tokens.values():
|
59 |
+
self.special_tokens_list[token_dict["id"]] = token_dict["token"]
|
60 |
+
|
61 |
+
tokenizer = Tokenizer(Unigram())
|
62 |
+
|
63 |
+
tokenizer.normalizer = normalizers.Sequence(
|
64 |
+
[
|
65 |
+
normalizers.Nmt(),
|
66 |
+
normalizers.NFKC(),
|
67 |
+
normalizers.Replace(Regex(" {2,}"), " "),
|
68 |
+
# normalizers.Lowercase(),
|
69 |
+
]
|
70 |
+
)
|
71 |
+
tokenizer.pre_tokenizer = pre_tokenizers.Sequence(
|
72 |
+
[
|
73 |
+
pre_tokenizers.Metaspace(
|
74 |
+
replacement=replacement, add_prefix_space=add_prefix_space
|
75 |
+
),
|
76 |
+
pre_tokenizers.Digits(individual_digits=True),
|
77 |
+
pre_tokenizers.Punctuation(),
|
78 |
+
]
|
79 |
+
)
|
80 |
+
tokenizer.decoder = decoders.Metaspace(
|
81 |
+
replacement=replacement, add_prefix_space=add_prefix_space
|
82 |
+
)
|
83 |
+
|
84 |
+
tokenizer.post_processor = TemplateProcessing(
|
85 |
+
single=f"$A {self.special_tokens['eos']['token']}",
|
86 |
+
special_tokens=[
|
87 |
+
(self.special_tokens["eos"]["token"], self.special_tokens["eos"]["id"])
|
88 |
+
],
|
89 |
+
)
|
90 |
+
|
91 |
+
parameters = {
|
92 |
+
"model": "SentencePieceUnigram",
|
93 |
+
"replacement": replacement,
|
94 |
+
"add_prefix_space": add_prefix_space,
|
95 |
+
}
|
96 |
+
|
97 |
+
super().__init__(tokenizer, parameters)
|
98 |
+
|
99 |
+
def train(
|
100 |
+
self,
|
101 |
+
files: Union[str, List[str]],
|
102 |
+
vocab_size: int = 8000,
|
103 |
+
show_progress: bool = True,
|
104 |
+
):
|
105 |
+
"""Train the model using the given files"""
|
106 |
+
|
107 |
+
trainer = trainers.UnigramTrainer(
|
108 |
+
vocab_size=vocab_size,
|
109 |
+
special_tokens=self.special_tokens_list,
|
110 |
+
show_progress=show_progress,
|
111 |
+
)
|
112 |
+
|
113 |
+
if isinstance(files, str):
|
114 |
+
files = [files]
|
115 |
+
self._tokenizer.train(files, trainer=trainer)
|
116 |
+
|
117 |
+
self.add_unk_id()
|
118 |
+
|
119 |
+
def train_from_iterator(
|
120 |
+
self,
|
121 |
+
iterator: Union[Iterator[str], Iterator[Iterator[str]]],
|
122 |
+
vocab_size: int = 8000,
|
123 |
+
show_progress: bool = True,
|
124 |
+
):
|
125 |
+
"""Train the model using the given iterator"""
|
126 |
+
|
127 |
+
trainer = trainers.UnigramTrainer(
|
128 |
+
vocab_size=vocab_size,
|
129 |
+
special_tokens=self.special_tokens_list,
|
130 |
+
show_progress=show_progress,
|
131 |
+
)
|
132 |
+
|
133 |
+
self._tokenizer.train_from_iterator(iterator, trainer=trainer)
|
134 |
+
|
135 |
+
self.add_unk_id()
|
136 |
+
|
137 |
+
def add_unk_id(self):
|
138 |
+
tokenizer_json = json.loads(self._tokenizer.to_str())
|
139 |
+
|
140 |
+
tokenizer_json["model"]["unk_id"] = self.special_tokens["unk"]["id"]
|
141 |
+
|
142 |
+
self._tokenizer = Tokenizer.from_str(json.dumps(tokenizer_json))
|
143 |
+
|
144 |
+
|
145 |
+
def create_normal_tokenizer(dataset, model_name):
|
146 |
+
if isinstance(dataset, datasets.dataset_dict.DatasetDict):
|
147 |
+
training_corpus = (
|
148 |
+
dataset["train"][i : i + 1000]["smiles"]
|
149 |
+
for i in range(0, len(dataset), 1000)
|
150 |
+
)
|
151 |
+
else:
|
152 |
+
training_corpus = (
|
153 |
+
dataset[i : i + 1000]["smiles"] for i in range(0, len(dataset), 1000)
|
154 |
+
)
|
155 |
+
|
156 |
+
if "deberta" in model_name:
|
157 |
+
# Train tokenizer
|
158 |
+
old_tokenizer = AutoTokenizer.from_pretrained(model_name)
|
159 |
+
tokenizer = old_tokenizer.train_new_from_iterator(training_corpus, 1000)
|
160 |
+
elif "t5" in model_name:
|
161 |
+
tokenizer = SentencePieceUnigramTokenizer(
|
162 |
+
unk_token="<unk>", eos_token="</s>", pad_token="<pad>"
|
163 |
+
)
|
164 |
+
tokenizer.train_from_iterator(training_corpus, 1000)
|
165 |
+
|
166 |
+
return tokenizer
|
167 |
+
|
168 |
+
|
169 |
+
def create_character_level_tokenizer(dataset, model_name):
|
170 |
+
df = dataset["train"].to_pandas()
|
171 |
+
df["smiles"] = [" ".join(list(i)) for i in df["smiles"]]
|
172 |
+
dataset = datasets.Dataset.from_pandas(df)
|
173 |
+
|
174 |
+
tokenizer = create_normal_tokenizer(dataset, model_name)
|
175 |
+
|
176 |
+
return tokenizer
|
177 |
+
|
178 |
+
|
179 |
+
def parse_args():
|
180 |
+
parser = argparse.ArgumentParser()
|
181 |
+
parser.add_argument(
|
182 |
+
"--use_character_level_tokenizer",
|
183 |
+
action="store_true",
|
184 |
+
default=False,
|
185 |
+
required=False,
|
186 |
+
)
|
187 |
+
return parser.parse_args()
|
188 |
+
|
189 |
+
|
190 |
+
CFG = parse_args()
|
191 |
+
|
192 |
+
|
193 |
+
# Initialize a dataset
|
194 |
+
dataset = load_dataset(
|
195 |
+
"csv", data_files=os.path.join(data_dir, "ZINC-canonicalized.csv")
|
196 |
+
)
|
197 |
+
|
198 |
+
if CFG.use_character_level_tokenizer:
|
199 |
+
tokenizer = create_character_level_tokenizer(dataset, "t5")
|
200 |
+
else:
|
201 |
+
tokenizer = create_normal_tokenizer(dataset, "t5")
|
202 |
+
# Save files to disk
|
203 |
+
tokenizer.save(os.path.join(script_dir, "CompoundT5/CompoundT5-config/tokenizer.json"))
|
204 |
+
|
205 |
+
config = T5Config.from_pretrained(
|
206 |
+
"google/t5-v1_1-base", vocab_size=tokenizer.get_vocab_size()
|
207 |
+
)
|
208 |
+
config.save_pretrained(os.path.join(script_dir, "CompoundT5/CompoundT5-config/"))
|
CompoundT5/preprocess_data.py
ADDED
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import subprocess
|
3 |
+
import sys
|
4 |
+
import warnings
|
5 |
+
|
6 |
+
import pandas as pd
|
7 |
+
from rdkit import Chem, RDLogger
|
8 |
+
from sklearn.model_selection import train_test_split
|
9 |
+
|
10 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
11 |
+
from utils import remove_atom_mapping, seed_everything
|
12 |
+
|
13 |
+
seed_everything(seed=42)
|
14 |
+
|
15 |
+
# Disable RDKit warnings and Python warnings
|
16 |
+
RDLogger.DisableLog("rdApp.*")
|
17 |
+
warnings.filterwarnings("ignore")
|
18 |
+
|
19 |
+
script_dir = os.path.abspath(os.path.dirname(__file__))
|
20 |
+
project_root = os.path.abspath(os.path.join(script_dir, ".."))
|
21 |
+
data_dir = os.path.join(project_root, "data")
|
22 |
+
|
23 |
+
files_to_download = [
|
24 |
+
"1ZPsoUYb4HcxFzK_ac9rb_pQj7oO3Gagh",
|
25 |
+
"1XwkxxHiaWFbSNhGyxnv6hAliutIMNrIp",
|
26 |
+
"1yIwUH_OhER9nuMo9HjBhBmyc6zvmrSPA",
|
27 |
+
"1skFRirstIUijhieshvJEScBD2aB3H1YU",
|
28 |
+
"1fa2MyLdN1vcA7Rysk8kLQENE92YejS9B",
|
29 |
+
]
|
30 |
+
|
31 |
+
for file_id in files_to_download:
|
32 |
+
subprocess.run(
|
33 |
+
f"gdown 'https://drive.google.com/uc?export=download&id={file_id}'", shell=True
|
34 |
+
)
|
35 |
+
|
36 |
+
# Move downloaded files to data directory
|
37 |
+
subprocess.run("mv *.smi " + data_dir, shell=True)
|
38 |
+
subprocess.run("mv *.tsv " + data_dir, shell=True)
|
39 |
+
|
40 |
+
|
41 |
+
# Function to process SMILES files and save canonicalized versions
|
42 |
+
def process_smiles_files(file_paths):
|
43 |
+
unique_smiles = set()
|
44 |
+
for file_path in file_paths:
|
45 |
+
suppl = Chem.SmilesMolSupplier(file_path)
|
46 |
+
for mol in suppl:
|
47 |
+
if mol is not None:
|
48 |
+
try:
|
49 |
+
sm = Chem.MolToSmiles(mol, canonical=True)
|
50 |
+
unique_smiles.add(sm)
|
51 |
+
except:
|
52 |
+
continue
|
53 |
+
df = pd.DataFrame({"smiles": list(unique_smiles)})
|
54 |
+
df.to_csv(os.path.join(data_dir, "ZINC-canonicalized.csv"), index=False)
|
55 |
+
|
56 |
+
train, valid = train_test_split(df, test_size=0.1)
|
57 |
+
# Save train and validation data
|
58 |
+
train.to_csv(os.path.join(data_dir, "ZINC-canonicalized-train.csv"), index=False)
|
59 |
+
valid.to_csv(os.path.join(data_dir, "ZINC-canonicalized-valid.csv"), index=False)
|
60 |
+
|
61 |
+
|
62 |
+
# Process 16_p files
|
63 |
+
process_smiles_files([os.path.join(data_dir, f"16_p{i}.smi") for i in range(4)])
|
64 |
+
|
65 |
+
|
66 |
+
# Load reaction data
|
67 |
+
ord_df = pd.read_csv(
|
68 |
+
os.path.join(data_dir, "all_ord_reaction_uniq_with_attr20240506_v1.tsv"),
|
69 |
+
sep="\t",
|
70 |
+
names=["id", "input", "product", "condition"],
|
71 |
+
)
|
72 |
+
|
73 |
+
|
74 |
+
def data_split(row):
|
75 |
+
categories = [
|
76 |
+
"CATALYST",
|
77 |
+
"REACTANT",
|
78 |
+
"REAGENT",
|
79 |
+
"SOLVENT",
|
80 |
+
"INTERNAL_STANDARD",
|
81 |
+
"NoData",
|
82 |
+
]
|
83 |
+
data = {cat: [] for cat in categories}
|
84 |
+
input_data = row["input"]
|
85 |
+
|
86 |
+
if isinstance(input_data, str):
|
87 |
+
for item in input_data.split("."):
|
88 |
+
for cat in categories:
|
89 |
+
if cat in item:
|
90 |
+
data[cat].append(item[item.find(":") + 1 :])
|
91 |
+
break
|
92 |
+
|
93 |
+
for key, value in data.items():
|
94 |
+
data[key] = ".".join(value)
|
95 |
+
|
96 |
+
product_data = row["product"]
|
97 |
+
if isinstance(product_data, str):
|
98 |
+
product_data = product_data.replace(".PRODUCT", "PRODUCT")
|
99 |
+
pro_lis = []
|
100 |
+
for item in product_data.split("PRODUCT:"):
|
101 |
+
if item != "":
|
102 |
+
pro_lis.append(item)
|
103 |
+
data["PRODUCT"] = ".".join(pro_lis)
|
104 |
+
else:
|
105 |
+
data["PRODUCT"] = None
|
106 |
+
|
107 |
+
condition_data = row["condition"]
|
108 |
+
if isinstance(condition_data, str):
|
109 |
+
data["YIELD"] = (
|
110 |
+
float(condition_data.split(":")[1]) if "YIELD" in condition_data else None
|
111 |
+
)
|
112 |
+
temp_pos = condition_data.find("TEMP")
|
113 |
+
data["TEMP"] = (
|
114 |
+
float(condition_data[temp_pos:].split(":")[1])
|
115 |
+
if "TEMP" in condition_data
|
116 |
+
else None
|
117 |
+
)
|
118 |
+
else:
|
119 |
+
data["YIELD"] = None
|
120 |
+
data["TEMP"] = None
|
121 |
+
|
122 |
+
return list(data.values())
|
123 |
+
|
124 |
+
|
125 |
+
# Split data and create cleaned DataFrame
|
126 |
+
categories = [
|
127 |
+
"CATALYST",
|
128 |
+
"REACTANT",
|
129 |
+
"REAGENT",
|
130 |
+
"SOLVENT",
|
131 |
+
"INTERNAL_STANDARD",
|
132 |
+
"NoData",
|
133 |
+
"PRODUCT",
|
134 |
+
"YIELD",
|
135 |
+
"TEMP",
|
136 |
+
]
|
137 |
+
cleaned_data = {cat: [] for cat in categories}
|
138 |
+
|
139 |
+
for _, row in ord_df.iterrows():
|
140 |
+
split_data = data_split(row)
|
141 |
+
for i, value in enumerate(split_data):
|
142 |
+
cleaned_data[categories[i]].append(value)
|
143 |
+
|
144 |
+
cleaned_df = pd.DataFrame(cleaned_data)
|
145 |
+
|
146 |
+
# Apply remove_atom_mapping function to relevant columns
|
147 |
+
for column in [
|
148 |
+
"CATALYST",
|
149 |
+
"REACTANT",
|
150 |
+
"REAGENT",
|
151 |
+
"SOLVENT",
|
152 |
+
"INTERNAL_STANDARD",
|
153 |
+
"NoData",
|
154 |
+
"PRODUCT",
|
155 |
+
]:
|
156 |
+
cleaned_df[column] = cleaned_df[column].apply(
|
157 |
+
lambda x: remove_atom_mapping(x) if isinstance(x, str) else None
|
158 |
+
)
|
159 |
+
|
160 |
+
# Save cleaned DataFrame
|
161 |
+
cleaned_df.to_csv(os.path.join(data_dir, "preprocessed_ord.tsv"), index=False)
|
162 |
+
|
163 |
+
train, valid = train_test_split(cleaned_df, test_size=int(len(cleaned_df) * 0.1))
|
164 |
+
train, test = train_test_split(train, test_size=int(len(cleaned_df) * 0.1))
|
165 |
+
# Save train and validation data
|
166 |
+
train.to_csv(os.path.join(data_dir, "preprocessed_ord_train.csv"), index=False)
|
167 |
+
valid.to_csv(os.path.join(data_dir, "preprocessed_ord_valid.csv"), index=False)
|
168 |
+
test.to_csv(os.path.join(data_dir, "preprocessed_ord_test.csv"), index=False)
|
LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 Tatsuya Sagawa
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
data/additional_tokens.txt
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.
|
2 |
+
6
|
3 |
+
7
|
4 |
+
8
|
5 |
+
<
|
6 |
+
>
|
7 |
+
Ag
|
8 |
+
Al
|
9 |
+
Ar
|
10 |
+
As
|
11 |
+
Au
|
12 |
+
Ba
|
13 |
+
Bi
|
14 |
+
Ca
|
15 |
+
Cl
|
16 |
+
Cu
|
17 |
+
Fe
|
18 |
+
Ge
|
19 |
+
Hg
|
20 |
+
K
|
21 |
+
Li
|
22 |
+
Mg
|
23 |
+
Mn
|
24 |
+
Mo
|
25 |
+
Na
|
26 |
+
Nd
|
27 |
+
Ni
|
28 |
+
P
|
29 |
+
Pb
|
30 |
+
Pd
|
31 |
+
Pt
|
32 |
+
Re
|
33 |
+
Rh
|
34 |
+
Ru
|
35 |
+
Sb
|
36 |
+
Si
|
37 |
+
Sm
|
38 |
+
Ta
|
39 |
+
Ti
|
40 |
+
Tl
|
41 |
+
W
|
42 |
+
Yb
|
43 |
+
Zn
|
44 |
+
Zr
|
45 |
+
e
|
46 |
+
p
|
data/create_fig.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/data_analysis.ipynb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c94d83bc9377afc96d9f0d8033b1bf4e4b81c5ec9a6227c8c44be494cfb52c0
|
3 |
+
size 14612076
|
data/demo_reaction_data.csv
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,id,REACTANT,PRODUCT,REAGENT,SOLVENT,CATALYST,YIELD
|
2 |
+
0,ord-c2af606677024e008e8fb05d402e9b3b,B1C2CCCC1CCC2.C=CC1CCN(C(=O)OC(C)(C)C)C1.ClCCl.FC1(F)Oc2ccc(Br)cc2O1.N#N.O=C([O-])[O-].[K+].[K+].[Na+].[OH-],CC(C)(C)OC(=O)N1CCC(CCc2ccc3c(c2)OC(F)(F)O3)C1,Cl[Pd]Cl.[Fe+2].c1ccc(P(c2ccccc2)[c-]2cccc2)cc1.c1ccc(P(c2ccccc2)[c-]2cccc2)cc1,CN(C)C=O.O.O,Cl[Pd]Cl.[Fe+2].c1ccc(P(c2ccccc2)[c-]2cccc2)cc1.c1ccc(P(c2ccccc2)[c-]2cccc2)cc1,0.98
|
3 |
+
1,ord-96c71ebfff6c4ee8bb6a5aa2960c2ba3,Brc1cncc(I)c1.C#C[Si](C)(C)C,C[Si](C)(C)C#Cc1cncc(Br)c1,Cl[Pd](Cl)([P](c1ccccc1)(c1ccccc1)c1ccccc1)[P](c1ccccc1)(c1ccccc1)c1ccccc1,CC#N.CCN(CC)CC,Cl[Pd](Cl)([P](c1ccccc1)(c1ccccc1)c1ccccc1)[P](c1ccccc1)(c1ccccc1)c1ccccc1,0.99
|
4 |
+
2,ord-6e64400b2c7a4e9a8789b9e79cfcee25,Brc1ccc(-c2nc3ccccc3o2)nc1.CC1(C)OB(c2ccc(-c3ccc(N(c4ccccc4)c4ccccc4)cc3)nc2)OC1(C)C.O.O=C([O-])[O-].[Na+].[Na+],c1ccc(N(c2ccccc2)c2ccc(-c3ccc(-c4ccc(-c5nc6ccccc6o5)nc4)cn3)cc2)cc1,c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,C1CCOC1.CC(C)=O.ClCCl.ClCCl,c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,0.77
|
5 |
+
3,ord-b77f09a6bfcb449c8320355cea96234a,Brc1ccc2ncccc2c1.Cc1ccc2c(cnn2C2CCCCO2)c1[B-](F)(F)F.[K+],Cc1ccc2c(cnn2C2CCCCO2)c1-c1ccc2ncccc2c1,C1CCC(P(C2CCCCC2)C2CCCCC2)CC1.CC(=O)[O-].CC(=O)[O-].CCN(CC)CC.[Pd+2],C1CCOC1.CC#N.CCc1cc(CC)cc(CC)c1.CCc1cccc(CC)c1.CN(C)C=O.Cc1ccccc1.O,C1CCC(P(C2CCCCC2)C2CCCCC2)CC1.CC(=O)[O-].CC(=O)[O-].[Pd+2],0.13
|
6 |
+
4,ord-a78f05158222489f8516262e16c2e1d4,CC=CCOc1ccc(Cl)cc1I.CCCC[NH3+].O=C([O-])[O-].O=C[O-].[Cl-].[Na+].[Na+].[Na+],CCc1coc2ccc(Cl)cc12,CC(=O)[O-].CC(=O)[O-].[Pd+2],CN(C)C=O,CC(=O)[O-].CC(=O)[O-].[Pd+2],0.6
|
7 |
+
5,ord-5dee9b14e71f4696a55160a9b199f823,COC(=O)[C@@H]1CCCN1C[C@@H](CO)NC(=O)OCc1ccccc1.[H][H],O=C1N[C@H](CO)CN2CCC[C@@H]12,[Pd],CO.CO,[Pd],0.85
|
8 |
+
6,ord-414cb258bf5f49a68600ff46cab8a198,Cc1nc2ccccc2[nH]1.ClCCCCBr.[Na+].[OH-],Cc1nc2ccccc2n1CCCCCl,CCCC[N+](CCCC)(CCCC)CCCC.[Br-],ClCCl.ClCCl,CCCC[N+](CCCC)(CCCC)CCCC.[Br-],0.62
|
9 |
+
7,ord-89b9b02c63de42c69f3741b8d60df759,Brc1ccc(-c2ccc3c(-c4ccccc4)c4ccccc4c(-c4ccccc4)c3c2)cc1.CC(C)(C)P(C(C)(C)C)C(C)(C)C.CC(C)(C)[O-].[Na+].c1ccc2c(c1)[nH]c1ccc(-c3cccc4c3oc3ccccc34)cc12,c1ccc(-c2c3ccccc3c(-c3ccccc3)c3cc(-c4ccc(-n5c6ccccc6c6cc(-c7cccc8c7oc7ccccc78)ccc65)cc4)ccc23)cc1,O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.[Pd],CCCCCC.Cc1ccccc1,O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.[Pd],0.71
|
10 |
+
8,ord-79ba34c3a2a842aeac9844c9adfd81cd,C1COCCO1.CS(=O)(=O)c1ncc(OCC2CC2)c(Cl)n1.Cn1cc(B2OC(C)(C)C(C)(C)O2)c2c(c1=O)CCCC2.O.O=P([O-])([O-])[O-].[K+].[K+].[K+],Cn1cc(-c2nc(S(C)(=O)=O)ncc2OCC2CC2)c2c(c1=O)CCCC2,Cl[Pd]Cl.[Fe+2].c1ccc(P(c2ccccc2)[c-]2cccc2)cc1.c1ccc(P(c2ccccc2)[c-]2cccc2)cc1,CCOC(C)=O,Cl[Pd]Cl.[Fe+2].c1ccc(P(c2ccccc2)[c-]2cccc2)cc1.c1ccc(P(c2ccccc2)[c-]2cccc2)cc1,0.67
|
11 |
+
9,ord-ff39310cbf9b4ffeae75d7f1c5107b64,COC(=O)c1cc2c(cc1[N+](=O)[O-])OCCO2.NN.O,COC(=O)c1cc2c(cc1N)OCCO2,[Pd],CO.CO,[Pd],0.5
|
12 |
+
10,ord-3df4e020c7884aa982896480f6a21ef8,C1=COCCC1.CC(CCCCBr)(COC1CCCCO1)c1ccccc1.CC(CO)(CCCBr)c1ccccc1,CC(CCCBr)(COC1CCCCO1)c1ccccc1,Cc1ccc(S(=O)(=O)O)cc1.O,ClCCl,Cc1ccc(S(=O)(=O)O)cc1.O,0.95
|
13 |
+
11,ord-da92b8fd5ded4564a5da0bad090429ef,CCOC(=O)C1CCN(Cc2ccccc2)CC1=O.Cl,CCOC(=O)C1CCNCC1=O.Cl,[Pd],CCO,[Pd],0.84
|
14 |
+
12,ord-1dd27e38c75347908e354bc047bffa9d,COC(=O)c1cc2cc(C)cc([N+](=O)[O-])c2n1C(=O)OC(C)(C)C.O=C1CCC(=O)N1Br,COC(=O)c1cc2cc(CBr)cc([N+](=O)[O-])c2n1C(=O)OC(C)(C)C,CC(C)(C#N)N=NC(C)(C)C#N,ClC(Cl)(Cl)Cl,CC(C)(C#N)N=NC(C)(C)C#N,1.0
|
15 |
+
13,ord-eb1e3da77b1a47679dbc3f0f04feb804,CC(=O)[O-].N#CC(c1ccnc(Cl)n1)c1nc2ccccc2s1.[Na+],N#CC(c1ccncn1)c1nc2ccccc2s1,[Pd],CC(=O)O,[Pd],0.13
|
16 |
+
14,ord-652b7ae23fcd4311981e5f0157f7d978,Nc1cc2cc(Br)ccc2c(Br)n1.O=C[O-].O=C[O-].[NH4+].[NH4+],Nc1cc2cc(Br)ccc2cn1,O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.[Pd].[Pd].[Pd].[Pd],CN(C)C=O,O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.[Pd].[Pd].[Pd].[Pd],0.9
|
17 |
+
15,ord-e2070dae8ca14929bbd39eaf3388600c,CCCCOc1c(CNC(=O)OC(C)(C)C)n(CC(C)C)c(=O)c2ccc(OCc3ccccc3)cc12,CCCCOc1c(CNC(=O)OC(C)(C)C)n(CC(C)C)c(=O)c2ccc(O)cc12,[C].[Pd],C1CCOC1.CCO,[C].[Pd],0.97
|
18 |
+
16,ord-1c4c9374ded94ddd8102c9a833fd2784,Cn1cnc(C#N)c1.Fc1ccccc1Br,Cn1cnc(C#N)c1-c1ccccc1F.Cn1cnc(C#N)c1-c1ccccc1F,C=C[CH2-].C=C[CH2-].CC(C)(C)C(=O)[O-].CP(C)c1ccccc1.Cl[Pd+].Cl[Pd+].[K+],CCCC#N,C=C[CH2-].C=C[CH2-].CP(C)c1ccccc1.Cl[Pd+].Cl[Pd+],0.0
|
19 |
+
17,ord-c4f8232274154e2a8a24dae87cdf7ee4,COc1ccc(N(C(=O)c2ccc(C)s2)C(=O)c2ccc(C)s2)c(Br)c1,COc1ccc2[nH]c(=O)c3sc(C)cc3c2c1,CC(C)(C)[P]([Pd][P](C(C)(C)C)(C(C)(C)C)C(C)(C)C)(C(C)(C)C)C(C)(C)C,,CC(C)(C)[P]([Pd][P](C(C)(C)C)(C(C)(C)C)C(C)(C)C)(C(C)(C)C)C(C)(C)C,0.48
|
20 |
+
18,ord-3ec9d7738cee4b779ecd6978452b948a,CNCCNC.COc1cn(-c2cccc(Br)c2F)nc(-c2ccnn2-c2ccccc2)c1=O.O=C([O-])O.O=C1CCCCN1.O=P([O-])([O-])[O-].[K+].[K+].[K+].[Na+],COc1cn(-c2cccc(N3CCCCC3=O)c2F)nc(-c2ccnn2-c2ccccc2)c1=O,[Cu]I,C1COCCO1,[Cu]I,0.23
|
21 |
+
19,ord-8cdfa484669a4173a9374bd739e1b02e,C1=CCCCC1.O=C(Oc1ccccc1[C@@H]1O[C@@]2(COCc3ccccc3)CO[C@@H]1[C@@]2(O)Cc1ccccc1)c1ccccc1,O=C(Oc1ccccc1[C@@H]1O[C@@]2(CO)CO[C@@H]1[C@@H]2O)c1ccccc1,[C].[OH-].[OH-].[Pd+2],CCO,[C].[OH-].[OH-].[Pd+2],0.85
|
22 |
+
20,ord-a6fa804c926f474d9a4d3fb0992a554c,CCC(CC)(c1ccc(OCC(=O)C(C)(C)C)c(C)c1)c1ccc2sc(C(=O)O)cc2c1.COC(=O)CN.Cl.ClCCCl,CCC(CC)(c1ccc(OCC(=O)C(C)(C)C)c(C)c1)c1ccc2sc(C(=O)NCC(=O)O)cc2c1,CN(C)c1ccncc1,,CN(C)c1ccncc1,0.43
|
23 |
+
21,ord-b28e4256e2644f609f32cb5a12358d19,C=C(C)C[C@]1(c2ccccc2)CCN([C@@H](C)c2ccc(Br)cc2)C(=O)N1.CC(C)(C)OO.CCO.Cc1ccc(S(=O)(=O)C#N)cc1.[SiH3]c1ccccc1,C[C@@H](c1ccc(Br)cc1)N1CC[C@](CC(C)(C)C#N)(c2ccccc2)NC1=O,[Co],,[Co],0.04
|
24 |
+
22,ord-fed5b00f72254b7c8f95815d01fc8b4c,CC1(C)CC(=O)Oc2ccc(Br)cc21.Cc1ccccc1.[Na+].[OH-],C=C1CC(C)(C)c2cc(Br)ccc2O1,C[Al+]C.[CH3-].[Cl-].[Ti+3].c1cc[cH-]c1.c1cc[cH-]c1,C1CCOC1,C[Al+]C.[CH3-].[Cl-].[Ti+3].c1cc[cH-]c1.c1cc[cH-]c1,0.74
|
25 |
+
23,ord-c514bb3cd598458da799ed29437f4961,CC1(C)O/C(=C2/C(=O)Nc3ccc(F)cc32)C=C1Br.CCOC(C)=O.O=C([O-])[O-].O=Cc1ccc(B(O)O)cc1.[K+].[K+],CC1(C)O/C(=C2/C(=O)Nc3ccc(F)cc32)C=C1c1ccc(C=O)cc1,[Pd].c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1,C1CCOC1.O,[Pd].c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1,0.52
|
26 |
+
24,ord-97806c5415cd409eb317d696b2c1b264,C1CCOC1.O=C(Nc1cccc(Br)n1)C1(c2ccc3c(c2)OCO3)CC1.[Br-].[Zn+]CC1CCCCC1,O=C(Nc1cccc(CC2CCCCC2)n1)C1(c2ccc3c(c2)OCO3)CC1,Cl[Pd]Cl.[Fe+2].c1ccc(P(c2ccccc2)[c-]2cccc2)cc1.c1ccc(P(c2ccccc2)[c-]2cccc2)cc1,,Cl[Pd]Cl.[Fe+2].c1ccc(P(c2ccccc2)[c-]2cccc2)cc1.c1ccc(P(c2ccccc2)[c-]2cccc2)cc1,0.5
|
27 |
+
25,ord-a569e66c25e6439d8b2d02d4f793c445,CCN(CC)CC.Cl.N#CC(=O)c1ccc(Cl)cc1Cl.O=C1CCCC(=O)C1,O=C1CCCC(=O)C1C(=O)c1ccc(Cl)cc1Cl,CC(=O)[O-].CC(=O)[O-].[Cl-].[Cl-].[Cu+2].[Zn+2],CCOCC.ClCCl,CC(=O)[O-].CC(=O)[O-].[Cl-].[Cl-].[Cu+2].[Zn+2],0.78
|
28 |
+
26,ord-24ee85ae742841a4bfa0f009785a6cc5,O=C(c1ccc2[nH]c(C(=O)N3CCC(F)(F)CC3)cc2c1)N1CCC(N2CCOCC2)CC1.OB(O)c1cccc(Cl)c1.c1ccncc1,O=C(c1ccc2c(c1)cc(C(=O)N1CCC(F)(F)CC1)n2-c1cccc(Cl)c1)N1CCC(N2CCOCC2)CC1,CC(=O)[O-].CC(=O)[O-].[Cu+2],ClCCl,CC(=O)[O-].CC(=O)[O-].[Cu+2],0.64
|
29 |
+
27,ord-a95d6e9e35984f7790a1a55dc6292199,COC(=O)c1nc(Br)c(F)cc1N.OB(O)c1ccccc1F,COC(=O)c1nc(-c2ccccc2F)c(F)cc1N,ClCCl.Cl[Pd]Cl.[Fe+2].c1ccc(P(c2ccccc2)[c-]2cccc2)cc1.c1ccc(P(c2ccccc2)[c-]2cccc2)cc1,,ClCCl.Cl[Pd]Cl.[Fe+2].c1ccc(P(c2ccccc2)[c-]2cccc2)cc1.c1ccc(P(c2ccccc2)[c-]2cccc2)cc1,0.99
|
30 |
+
28,ord-b9347fbc2d1f439fb0b87974dc39c21e,CC1(C)c2cccc(P(c3ccccc3)c3ccccc3)c2Oc2c(P(c3ccccc3)c3ccccc3)cccc21.CCc1cc(N)ncn1.N#Cc1cc(Cl)c(-c2nc3ccnc(Br)c3s2)c(Cl)c1.O=C([O-])[O-].[Cs+].[Cs+],CCc1cc(Nc2nccc3nc(-c4c(Cl)cc(C#N)cc4Cl)sc23)ncn1,O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.[Pd].[Pd],C1COCCO1,O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.[Pd].[Pd],0.28
|
31 |
+
29,ord-669abf374db4492fa86becc5fb208313,CCc1nc(C)cc(=O)n1CCOc1ccc([N+](=O)[O-])cc1.[H][H],CCc1nc(C)cc(=O)n1CCOc1ccc(N)cc1,[Pd],C1COCCO1,[Pd],0.7
|
32 |
+
30,ord-79571893d7044aae86289e29886b41ff,C[C@@H]1CN(C(=O)OC(C)(C)C)[C@@H](C)CN1Cc1cc([N+](=O)[O-])cc2ccoc12.NN.O,C[C@@H]1CN(C(=O)OC(C)(C)C)[C@@H](C)CN1Cc1cc(N)cc2ccoc12,[Ni],C1CCOC1.CCO,[Ni],0.87
|
33 |
+
31,ord-b53374957af946358cd3c222fe4ba0bb,CC1(C)OB(c2ccc3c(c2)CCCO3)OC1(C)C.CCO.CCOC(=O)C(=O)c1c(C)sc(C)c1Br.O=C([O-])[O-].[Na+].[Na+],Cc1sc(C)c(-c2ccc3c(c2)CCCO3)c1C(=O)C(=O)O,[Pd].c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1,Cc1ccccc1.O.O,[Pd].c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1,1.0
|
34 |
+
32,ord-419341a196524ef49e44f08dda3c0e06,C=CCOc1ccc(COCCn2ccnn2)cc1.CN1C(=O)CC(=O)N(C)C1=O,Oc1ccc(COCCn2ccnn2)cc1,c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,ClCCl.ClCCl,c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,0.59
|
35 |
+
33,ord-c156b60e2b0d45b3b88f357c0838a8f3,Brc1ccc2ncccc2c1.Cc1ccc2c(cnn2C2CCCCO2)c1B(O)O,Cc1ccc2c(cnn2C2CCCCO2)c1-c1ccc2ncccc2c1,CC(=O)[O-].CC(=O)[O-].CC(C)(C)P(C(C)(C)C)C(C)(C)C.O=C([O-])O.[Na+].[Pd+2],CO,CC(=O)[O-].CC(=O)[O-].CC(C)(C)P(C(C)(C)C)C(C)(C)C.[Pd+2],0.39
|
36 |
+
34,ord-6a9fa314c63e49319d344313e3817ccb,Cc1ccc2c(cnn2C2CCCCO2)c1[B-](F)(F)F.O=S(=O)(Oc1ccc2ncccc2c1)C(F)(F)F.[K+],Cc1ccc2c(cnn2C2CCCCO2)c1-c1ccc2ncccc2c1,CC(=O)[O-].CC(=O)[O-].CC(C)(C)[O-].COc1cccc(OC)c1-c1ccccc1P(C1CCCCC1)C1CCCCC1.[Li+].[Pd+2],CCCCCC.CCc1cc(CC)cc(CC)c1.CCc1cccc(CC)c1.CN(C)C=O.CN(C)C=O.Cc1ccccc1.O,CC(=O)[O-].CC(=O)[O-].COc1cccc(OC)c1-c1ccccc1P(C1CCCCC1)C1CCCCC1.[Pd+2],0.11
|
37 |
+
35,ord-1f77b54dfacd4b91acfea3bec7fc20e1,CC1(O)CCNCC1.CCN(CC)CC.C[C@H](OC(=O)C(Br)c1ccccc1)c1ccccc1,C[C@H](OC(=O)[C@@H](c1ccccc1)N1CCC(C)(O)CC1)c1ccccc1,CCCC[N+](CCCC)(CCCC)CCCC.[I-],C1CCOC1.C1CCOC1.CCOC(C)=O,CCCC[N+](CCCC)(CCCC)CCCC.[I-],0.6
|
38 |
+
36,ord-ed45cb591b94415db1b32ac28a81a31d,COc1cc(C=O)c(F)cc1Cl.Cc1ccc(N)nc1.[C-]#[N+]C1CCCCC1,COc1cc(-c2nc3ccc(C)cn3c2NC2CCCCC2)c(F)cc1Cl,O=C(O)C(F)(F)F,CC(C)O.CC(C)O,O=C(O)C(F)(F)F,0.1
|
39 |
+
37,ord-0092383681d845dcba430ac49717792e,CC(=O)[O-].CCOC(=O)c1cc(Cl)n2nccc2n1.[Na+],CCOC(=O)c1ccn2nccc2n1,[Pd],CCO.CCOC(C)=O,[Pd],0.78
|
40 |
+
38,ord-bf17cc17249648a7a2825e5ddf7d6505,O=S(Cl)Cl.O=[N+]([O-])c1cc2c(O)ncnc2cc1F,O=[N+]([O-])c1cc2c(Cl)ncnc2cc1F,CN(C)C=O,,CN(C)C=O,0.94
|
41 |
+
39,ord-de48fac1d2d04dec84902e698edeb682,CC[SiH](CC)CC.Cc1cc(C2=NCC(c3cc(Cl)cc(Cl)c3)(C(F)(F)F)C2)ccc1Br.Cc1cc(C2=NCC(c3cc(Cl)cc(Cl)c3)(C(F)(F)F)C2)ccc1Br.O=C([O-])[O-].[Na+].[Na+],Cc1cc(C2=NCC(c3cc(Cl)cc(Cl)c3)(C(F)(F)F)C2)ccc1C=O,Cl[Pd]Cl.[Fe+2].c1ccc(P(c2ccccc2)[c-]2cccc2)cc1.c1ccc(P(c2ccccc2)[c-]2cccc2)cc1,CN(C)C=O,Cl[Pd]Cl.[Fe+2].c1ccc(P(c2ccccc2)[c-]2cccc2)cc1.c1ccc(P(c2ccccc2)[c-]2cccc2)cc1,0.74
|
42 |
+
40,ord-41d60ad3953644c4b04ae802bfc04e5a,C=CCO.CCN(CC)CC.O=[N+]([O-])c1ccc(Br)c(C(F)(F)F)c1,O=CCCc1ccc([N+](=O)[O-])cc1C(F)(F)F,CC(=O)[O-].CC(=O)[O-].CCCC[N+](CCCC)(CCCC)CCCC.[Cl-].[Pd+2],CN(C)C=O,CC(=O)[O-].CC(=O)[O-].CCCC[N+](CCCC)(CCCC)CCCC.[Cl-].[Pd+2],0.69
|
43 |
+
41,ord-8afc8beb1f7840b8aba6c03887d2ef67,COc1ccc(-c2cnc(N)c(Cc3ccccc3)n2)cc1.COc1ccc2c(C(=O)Cl)cccc2c1.O,COc1ccc(-c2cnc(N(C(=O)c3cccc4cc(OC)ccc34)C(=O)c3cccc4cc(OC)ccc34)c(Cc3ccccc3)n2)cc1,CN(C)c1ccncc1,c1ccncc1,CN(C)c1ccncc1,0.68
|
44 |
+
42,ord-a293a2555d6847cfb9346d2687aa4a50,C1CCOC1.CCN(CC)CC.COc1cc(C(=O)Nc2nc3c(OC)ccc(C4=CCN(C(=O)OC(C)(C)C)CC4)c3s2)cc(Cl)n1,COc1cc(C(=O)Nc2nc3c(OC)ccc(C4CCN(C(=O)OC(C)(C)C)CC4)c3s2)ccn1,[Pd],CO,[Pd],0.3
|
45 |
+
43,ord-56e75ad899ba4adea278567a03433405,Brc1ccc2ncccc2c1.Cc1ccc2c(cnn2C2CCCCO2)c1[B-](F)(F)F.[K+],Cc1ccc2c(cnn2C2CCCCO2)c1-c1ccc2ncccc2c1,CC(=O)[O-].CC(=O)[O-].[Na+].[OH-].[Pd+2],CCc1cc(CC)cc(CC)c1.CCc1cccc(CC)c1.CN(C)C=O.CN(C)C=O.Cc1ccccc1.O.O,CC(=O)[O-].CC(=O)[O-].[Pd+2],0.19
|
46 |
+
44,ord-421f24003cb04881a3e8f8edef479566,CC(=O)[Cu]C(C)=O.CCN(CC)CC.COC(=O)c1c(C)[nH]c2ccccc12.OB(O)c1ccccc1,COC(=O)c1c(C)n(-c2ccccc2)c2ccccc12,CN(C)c1ccncc1,ClCCl,CN(C)c1ccncc1,0.39
|
47 |
+
45,ord-f97af96ff3dc4293996dd87a9db42bb9,CC(C)c1nn(Cc2ccccc2Br)c(=O)c(C(=O)NCC(=O)O)c1O.Cl.O=C([O-])[O-].OB(O)c1ccc(C(F)(F)F)cc1.[K+].[K+],CC(C)c1nn(Cc2ccccc2-c2ccc(C(F)(F)F)cc2)c(=O)c(C(=O)NCC(=O)O)c1O,c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,C1COCCO1.O.O,c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,0.46
|
48 |
+
46,ord-952ab56552d14163b81797c4be7275fa,CC(C)(C)OC(=O)N1CCC(C)(C)c2ccc([N+](=O)[O-])cc21,CC(C)(C)OC(=O)N1CCC(C)(C)c2ccc(N)cc21,[Pd],CO,[Pd],0.95
|
49 |
+
47,ord-804093bffa5d43049c093723ec65aa6e,Cc1ccc(N)cc1.FC(F)(F)c1ccc(Cl)cc1,Cc1ccc(Nc2ccc(C(F)(F)F)cc2)cc1,CC(C)c1cc(C(C)C)c(-c2ccccc2P(C2CCCCC2)(C2CCCCC2)->[Pd]2(<-Nc3ccccc3-c3ccccc32)OS(=O)(=O)C(F)(F)F)c(C(C)C)c1.CN1CCCN2CCCN=C12.c1ccc2oncc2c1,CS(C)=O.CS(C)=O.CS(C)=O.CS(C)=O.CS(C)=O,CC(C)c1cc(C(C)C)c(-c2ccccc2P(C2CCCCC2)(C2CCCCC2)->[Pd]2(<-Nc3ccccc3-c3ccccc32)OS(=O)(=O)C(F)(F)F)c(C(C)C)c1,0.19
|
50 |
+
48,ord-3292ea938a434474adb4a63169f04be8,CCCC(=NOCC)C1=C(O)CC(c2c(C)c(C)c(OCc3ccccc3)c(C)c2C)CC1=O,CCCC(=NOCC)C1=C(O)CC(c2c(C)c(C)c(O)c(C)c2C)CC1=O,Cl.[Pd],CCOC(C)=O,Cl.[Pd],0.3
|
51 |
+
49,ord-45f62759dcd5421bb66c8fd1b768bbe7,FC1(F)C(F)(F)C(F)(F)C(F)(I)C(F)(F)C1(F)F.Nc1ccccc1.O=C([O-])O.O=S([O-])S(=O)[O-].[Na+].[Na+].[Na+],Nc1ccc(C2(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C2(F)F)cc1,CCCC[N+](CCCC)(CCCC)CCCC.O=S(=O)([O-])O,COC(C)(C)C.O,CCCC[N+](CCCC)(CCCC)CCCC.O=S(=O)([O-])O,0.68
|
52 |
+
50,ord-7030178ac7694e1eace7144b87cfafa0,CCOC(=O)[C@H](C)Nc1ncccc1C#N.C[O-].Cl.[Na+],C[C@@H]1Nc2ncccc2CNC1=O,[Ni],CO,[Ni],0.23
|
53 |
+
51,ord-50297983814043f68e5ed0382c170795,O=C1NC(=O)c2c1c(-c1ccccc1)cc1[nH]c3ccc(OP(=O)(OCc4ccccc4)OCc4ccccc4)cc3c21,O=C1NC(=O)c2c1c(-c1ccccc1)cc1[nH]c3ccc(OP(=O)(O)O)cc3c21,[Pd],C1CCOC1.CO,[Pd],0.71
|
54 |
+
52,ord-72b281f722d64a2a86366b024fffcf4e,CCOCC.COC(=O)C(C)(SC)c1cccs1,COC(=O)C(C)c1cccs1,O=S(=O)([O-])[O-].[Cu+2].[Zn],CC(=O)O,O=S(=O)([O-])[O-].[Cu+2].[Zn],0.87
|
55 |
+
53,ord-09fd3b562efe4258b5377913c5a128a3,CC(C)(C)C(=O)O.CCCCP(C12CC3CC(CC(C3)C1)C2)C12CC3CC(CC(C3)C1)C2.COC(=O)c1ccc2c(c1)CCCC2(O)c1nccs1.COc1ccnc(Nc2cc(C)cc(Br)n2)c1.[Cs+].[F-],COC(=O)c1ccc2c(c1)CCCC2(O)c1ncc(-c2cc(C)cc(Nc3cc(OC)ccn3)n2)s1,CC(=O)[O-].CC(=O)[O-].[Pd+2],C1COCCO1.C1COCCO1,CC(=O)[O-].CC(=O)[O-].[Pd+2],0.52
|
56 |
+
54,ord-5a891173b2f14da481ad4b70dab850c7,C=CC(=O)NC1CCN(S(=O)(=O)c2ccc([N+](=O)[O-])cc2)CC1.CCO.[Cl-].[NH4+],C=CC(=O)NC1CCN(S(=O)(=O)c2ccc(N)cc2)CC1,[Fe],O,[Fe],0.51
|
57 |
+
55,ord-947db249f38c44959a558af280c17a6a,N#Cc1cc(B(O)O)ccc1F.O.O=C([O-])[O-].O=C1C(Cc2c(Cl)cc(OS(=O)(=O)C(F)(F)F)cc2Cl)CCN1C1CCCCC1.[Na+].[Na+],N#Cc1cc(-c2cc(Cl)c(CC3CCN(C4CCCCC4)C3=O)c(Cl)c2)ccc1F,c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,C1CCOC1,c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,0.88
|
58 |
+
56,ord-a5d240b1afd440d68f607db269eb8d83,CCN=C=NCCCN(C)C.COc1cccc(S(=O)(=O)N(CCCN(C)C)[C@@H](C(=O)OCc2ccccc2)C(C)(C)C)c1.Cl.Cl.NOC1CCCCO1.O.O.O.O.O.O.O.O.O.O.O=C([O-])O.On1nnc2cccnc21.[Na+],COc1cccc(S(=O)(=O)N(CCCN(C)C)[C@@H](C(=O)NO)C(C)(C)C)c1,[Pd],CN(C)C=O.CN(C)C=O.CO.CO,[Pd],1.0
|
59 |
+
57,ord-ea572bd72b7c41448346a54c8f0ce405,COc1ccc(Cl)cc1.Cc1cc(C)c(B(O)O)c(C)c1,COc1ccc(-c2c(C)cc(C)cc2C)cc1,C1=C\CC/C=C\CC/1.C1=C\CC/C=C\CC/1.CP(C)C.O.O=P([O-])([O-])[O-].[K+].[K+].[K+].[Ni],C1COCCO1.C1COCCO1.C1COCCO1.C1COCCO1,C1=C\CC/C=C\CC/1.C1=C\CC/C=C\CC/1.CP(C)C.[Ni],0.0
|
60 |
+
58,ord-4f2876eb72624720885d110d9afe8876,O=C(O)Cc1cccc(F)c1[N+](=O)[O-],O=C1Cc2cccc(F)c2N1,[Pd],CC(=O)O,[Pd],0.83
|
61 |
+
59,ord-f70beae1ac0f46688a2b14e378fea5de,CC(C)(C)[O-].CC(C)(C)[Si](C)(C)OCC(OS(C)(=O)=O)c1ccc(Cl)c(F)c1.CSc1nccc(-c2cc[nH]c(=O)n2)n1.[K+],CSc1nccc(-c2ccn(C(CO[Si](C)(C)C(C)(C)C)c3ccc(Cl)c(F)c3)c(=O)n2)n1,CCCC[N+](CCCC)(CCCC)CCCC.[I-],C1CCOC1.C1CCOC1,CCCC[N+](CCCC)(CCCC)CCCC.[I-],0.33
|
62 |
+
60,ord-dd4fc6ef7a154ea990b298e4c0e473d8,C[C@H]1CN(C(=O)OC(C)(C)C)CCN1c1ccc(N)nc1.Cn1nc(Cl)cc(Br)c1=O.O=C([O-])[O-].[Cs+].[Cs+],C[C@H]1CN(C(=O)OC(C)(C)C)CCN1c1ccc(Nc2cc(Cl)nn(C)c2=O)nc1,CC1(C)c2cccc(P(c3ccccc3)c3ccccc3)c2Oc2c(P(c3ccccc3)c3ccccc3)cccc21.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.[Pd].[Pd],C1COCCO1,CC1(C)c2cccc(P(c3ccccc3)c3ccccc3)c2Oc2c(P(c3ccccc3)c3ccccc3)cccc21.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.[Pd].[Pd],0.86
|
63 |
+
61,ord-c01ebf59211542e7b8b22c41350e1260,Cc1ccc(C2CN(C)Cc3cc(B4OC(C)(C)C(C)(C)O4)ccc32)cc1.Cc1ccc(Cl)nn1.O=C([O-])[O-].[Cs+].[Cs+],Cc1ccc(C2CN(C)Cc3cc(-c4ccc(C)nn4)ccc32)cc1,Cl[Pd]Cl.[Fe+2].c1ccc(P(c2ccccc2)[c-]2cccc2)cc1.c1ccc(P(c2ccccc2)[c-]2cccc2)cc1,CN(C)C=O.O,Cl[Pd]Cl.[Fe+2].c1ccc(P(c2ccccc2)[c-]2cccc2)cc1.c1ccc(P(c2ccccc2)[c-]2cccc2)cc1,0.21
|
64 |
+
62,ord-ad61602b94f64d8495ee716a39e6d2a1,CCCCCCc1csc2c(CCCCCC)c(C(=O)O)sc12.O=C=O.c1ccc2ncccc2c1,CCCCCCc1csc2c(CCCCCC)csc12,[Cu],CCCCCC,[Cu],0.68
|
65 |
+
63,ord-9c5bfa45fb954b7c9fcf1a95e13440f3,CC(C)(C)[Si](C)(C)Cl.COc1ccc(F)c(-c2ccc(CO)cc2C(O)C(C)(C)C)c1,COc1ccc(F)c(-c2ccc(CO[Si](C)(C)C(C)(C)C)cc2C(O)C(C)(C)C)c1,CN(C)c1ccncc1,ClCCl,CN(C)c1ccncc1,0.96
|
66 |
+
64,ord-490528e794064585840f9e8171a1c4ff,C1=COCCC1.Cc1cc(OC2CCCCO2)cc(CO)c1Br,Cc1cc(OC2CCCCO2)cc(COC2CCCCO2)c1Br,CC1(C)C2CCC1(CS(=O)(=O)O)C(=O)C2,ClCCl,CC1(C)C2CCC1(CS(=O)(=O)O)C(=O)C2,0.9
|
67 |
+
65,ord-42093bc147ea48df87675a44b529ea9e,COC(=O)/C=C/c1ccc(N(Cc2ccccc2)Cc2ccccc2)nc1C(=O)OC.[BH4-].[Na+],COC(=O)CCc1ccc(N(Cc2ccccc2)Cc2ccccc2)nc1C(=O)OC,Cl[Ni]Cl.O.O.O.O.O.O,CO.[Cl-].[NH4+],Cl[Ni]Cl.O.O.O.O.O.O,0.86
|
68 |
+
66,ord-8b9ed594e1bb490e8e63adf998a912d0,CC(C)(C)OC(=O)N1CC[C@H](O)[C@H]1CO.CC(C)(C)[Si](C)(C)Cl.CCN(CC)CC,CC(C)(C)OC(=O)N1CC[C@H](O)[C@H]1CO[Si](C)(C)C(C)(C)C,CN(C)c1ccncc1,ClCCl.ClCCl,CN(C)c1ccncc1,0.99
|
69 |
+
67,ord-20adf5fd2c4745b282d3719f1519f23a,Brc1ccc2ncccc2c1.Cc1ccc2c(cnn2C2CCCCO2)c1[B-](F)(F)F.[K+],Cc1ccc2c(cnn2C2CCCCO2)c1-c1ccc2ncccc2c1,CC(=O)[O-].CC(=O)[O-].CN(C)c1ccc(P(C(C)(C)C)C(C)(C)C)cc1.CN(C)c1ccc(P(C(C)(C)C)C(C)(C)C)cc1.Cl[Pd]Cl.O=C([O-])O.[Na+].[Pd+2],C1CCOC1.CCc1cc(CC)cc(CC)c1.CCc1cccc(CC)c1.CN(C)C=O.Cc1ccccc1.O.O,CC(=O)[O-].CC(=O)[O-].CN(C)c1ccc(P(C(C)(C)C)C(C)(C)C)cc1.CN(C)c1ccc(P(C(C)(C)C)C(C)(C)C)cc1.Cl[Pd]Cl.[Pd+2],0.21
|
70 |
+
68,ord-a84ce0c2384f46d9a1b1262447ac2d9e,CCOC(=O)CC(=O)OCC.COC1C=CC(OC)O1.O,CCOC(=O)C(C(=O)OCC)c1ccco1,[Cl-].[Cl-].[Zn+2],CC(=O)O,[Cl-].[Cl-].[Zn+2],0.29
|
71 |
+
69,ord-74094cf1745040d383c9f9e1048d6de7,Brc1ccc(-c2ccc3ccc4cccc5ccc2c3c45)cc1.CC1(C)C=C(B2OC(C)(C)C(C)(C)O2)C=C2C=c3cc4c5ccccc5c5ccccc5c4cc3=C21.CCO.O=C([O-])[O-].[Na+].[Na+],CC1(C)C=C(c2ccc(-c3ccc4ccc5cccc6ccc3c4c56)cc2)C=C2C=c3cc4c5ccccc5c5ccccc5c4cc3=C21,c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,CO.Cc1ccccc1,c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,0.57
|
72 |
+
70,ord-e2a772144e774571bc6c80d27ec8d125,C1CCOC1.C=CCO[C@H]1O[C@H](COCc2ccccc2)[C@@H](O[C@@H]2O[C@H](CF)[C@@H](OCc3ccccc3)[C@H](OCc3ccccc3)[C@H]2OCc2ccccc2)[C@H](OCc2ccccc2)[C@H]1OCc1ccccc1,C=CCOC1O[C@H](COCc2ccccc2)[C@@H](O[C@@H]2O[C@H](CF)[C@@H](OCc3ccccc3)[C@H](OCc3ccccc3)[C@H]2OCc2ccccc2)[C@H](OCc2ccccc2)[C@H]1OCc1ccccc1,Cl[Pd]Cl,CO,Cl[Pd]Cl,0.63
|
73 |
+
71,ord-38e4779d915e47c48c745aea7ba79a76,CCCc1c(Cc2ccc(-c3ccccc3C#N)cc2F)c(=O)n([C@H]2CC[C@H](O)CC2)c2ncnn12.CCOC(=O)C=[N+]=[N-].Cc1ccccc1,CCCc1c(Cc2ccc(-c3ccccc3C#N)cc2F)c(=O)n([C@H]2CC[C@H](OCC(C)(C)O)CC2)c2ncnn12,CC(=O)[O-].[Rh+],,CC(=O)[O-].[Rh+],0.22
|
74 |
+
72,ord-3d6ff4ad7dda45e294602d21def90e23,CC(=O)O.CCC=O.COC(=O)CC(=O)CCl,CCC=C(C(=O)CCl)C(=O)OC,C1CCNCC1,ClCCl.ClCCl,C1CCNCC1,0.92
|
75 |
+
73,ord-85018eaedb81478f8d3570187284f89c,CCOP(=O)(OCC)[C@@H]1SC[C@@H](CO[Si](C)(C)C(C)(C)C)S1,CCOP(=O)(OCC)[C@@H]1SC[C@@H](CO)S1,CC(=O)Cl.[Cl-].[NH4+],CO,CC(=O)Cl.[Cl-].[NH4+],0.86
|
76 |
+
74,ord-1489de592ed543aead9b9b3cc9eda428,CC(C)(C)OC(=O)OC(=O)OC(C)(C)C.CCOC(=O)c1sc(N)nc1C(F)(F)F,CCOC(=O)c1sc(NOC(=O)OC(C)(C)C)nc1C(F)(F)F,CN(C)c1ccncc1,ClCCl,CN(C)c1ccncc1,0.92
|
77 |
+
75,ord-cb7e6f31ffa444cfad91d75ff54f1c8b,CC(C)(C)OC(=O)N1C[C@@H](COc2c(F)cccc2[N+](=O)[O-])OC[C@H]1CO[Si](C)(C)C(C)(C)C.[H][H],CC(C)(C)OC(=O)N1C[C@@H](COc2c(N)cccc2F)OC[C@H]1CO[Si](C)(C)C(C)(C)C,[Pd],CCOC(C)=O,[Pd],1.0
|
78 |
+
76,ord-2686b5fe071e4571b88efe2f81c589cc,CC(C)(C)OC(=O)OC(=O)[O-].[N-]=[N+]=NC(=O)N=[N+]=[N-],CC(C)(C)OC(=O)C(N)=O,[OH-].[OH-].[Pd+2],CO,[OH-].[OH-].[Pd+2],0.55
|
79 |
+
77,ord-c1d83ba8cfd3462c853244d9e87f8ffa,CC(C)n1nc(I)c2c(N)ncnc21.COc1ccc(B(O)O)cc1.O=C([O-])[O-].[Na+].[Na+],COc1ccc(-c2nn(C(C)C)c3ncnc(N)c23)cc1,c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,CCO.COCCOC,c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,0.16
|
80 |
+
78,ord-dd35d66336ce4bc5b4c1d0e068dfa168,O=C(OCc1ccccc1)N1CCC2(CC1)C(=O)N(Cc1cccc([N+](=O)[O-])c1)CN2c1ccccc1.[Cl-].[NH4+],Nc1cccc(CN2CN(c3ccccc3)C3(CCN(C(=O)OCc4ccccc4)CC3)C2=O)c1,[Fe],CCO.O,[Fe],0.95
|
81 |
+
79,ord-b7d4ad900fd64b83ac3ac7883e5a67fb,Nc1c(-c2ccccn2)cccc1[N+](=O)[O-],Nc1cccc(-c2ccccn2)c1N,[Pd],CCOC(C)=O,[Pd],0.89
|
82 |
+
80,ord-992d3e706540481abd31c86e7f5a30a9,Cc1cc(Br)sc1CO.O=C([O-])[O-].OB(O)c1ccc(C(F)(F)F)cc1.[K+].[K+],Cc1cc(-c2ccc(C(F)(F)F)cc2)sc1CO,c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,Cc1ccccc1,c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,0.48
|
83 |
+
81,ord-9df5ea16a6b54f7287d0342b6c69c662,Cc1ccncc1N1CCNC1=O.Cn1cc(Br)c2c(C#N)cccc21.N[C@@H]1CCCC[C@H]1N.O=C([O-])[O-].[K+].[K+],Cc1ccncc1N1CCN(c2cn(C)c3cccc(C#N)c23)C1=O,I[Cu]I,C1COCCO1,I[Cu]I,0.24
|
84 |
+
82,ord-f64a42b092c94884876b2d8c6edce61d,CN1CC=C(c2ccc(N)nn2)CC1,CN1CCC(c2ccc(N)nn2)CC1,[Pd],CCO,[Pd],0.89
|
85 |
+
83,ord-0d04174b23454275a39535f812e1524d,CC1(C)OBOC1(C)C.CCN(CC)CC.CN(C)c1ccc(-c2cnc3c(c2)c(I)cn3S(=O)(=O)c2ccccc2)cc1.ClCCl,CN(C)c1ccc(-c2cnc3c(c2)c(B2OC(C)(C)C(C)(C)O2)cn3S(=O)(=O)c2ccccc2)cc1,Cl[Pd]Cl.[Fe+2].c1ccc(P(c2ccccc2)[c-]2cccc2)cc1.c1ccc(P(c2ccccc2)[c-]2cccc2)cc1,C1COCCO1,Cl[Pd]Cl.[Fe+2].c1ccc(P(c2ccccc2)[c-]2cccc2)cc1.c1ccc(P(c2ccccc2)[c-]2cccc2)cc1,1.0
|
86 |
+
84,ord-ad46416b40594ec5bdb903ea60cf5bf4,CCCCCO.Nc1ccccc1C(=O)O.O=C([O-])[O-].O=[N+]([O-])c1cc(Cl)ccc1Br.[K+].[K+],O=C(O)c1ccccc1Nc1ccc(Cl)cc1[N+](=O)[O-],[Cu],,[Cu],0.83
|
87 |
+
85,ord-dfe7f7782502427c94741fe658e539f4,CC1(C)c2cccc(P(c3ccccc3)c3ccccc3)c2Oc2c(P(c3ccccc3)c3ccccc3)cccc21.CN(C)CCN.O=C([O-])[O-].O=[N+]([O-])c1cc(I)c2occc2c1.[Cs+].[Cs+],CN(C)CCNc1cc([N+](=O)[O-])cc2ccoc12,O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.[Pd].[Pd],Cc1ccccc1C,O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.[Pd].[Pd],0.44
|
88 |
+
86,ord-b62d233adb304f30b5a36ca35fcd7b87,CC(=O)OC(c1cncc(Br)c1)C(F)(F)F.CS(=O)[O-].O=C(O)[C@@H]1CCCN1.O=C([O-])O.O=C([O-])[O-].[K+].[K+].[Na+].[Na+],CS(=O)(=O)c1cncc(C(O)C(F)(F)F)c1,I[Cu]I,CS(C)=O.O,I[Cu]I,0.17
|
89 |
+
87,ord-e53b2ec8d81d4a98b6f257b0907e0246,CC(C)(C)[Si](C)(C)Oc1cc(C#N)ccc1NC(=S)Nc1ccccc1Br.CCN(CC)CC.CS(=O)(=O)Cl,CC(C)(C)[Si](C)(C)Oc1cc(C#N)ccc1N=C=Nc1ccccc1Br,CN(C)c1ccncc1,ClCCl,CN(C)c1ccncc1,1.0
|
90 |
+
88,ord-097147ff6e314b5fba919dbc677dfb5a,COc1ccc(B(O)O)cn1.FC(F)(F)c1cccnc1Cl.O=C([O-])[O-].[K+].[K+],COc1ccc(-c2ncccc2C(F)(F)F)cn1,[Pd].c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1,Cc1ccccc1,[Pd].c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1,0.95
|
91 |
+
89,ord-9a7a82dd9a9145ada8a907e3038be509,C1COCCO1.CO/C=C(/I)C(=O)OC.COCCOc1ccc(OCc2cc(Cl)ccc2B(O)O)cc1.O=P([O-])([O-])[O-].[K+].[K+].[K+],CO/C=C(/C(=O)OC)c1ccc(Cl)cc1COc1ccc(OCCOC)cc1,[Pd].c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1,CCOC(C)=O.O,[Pd].c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1.c1ccc(P(c2ccccc2)c2ccccc2)cc1,0.71
|
92 |
+
90,ord-9f4ef94bf15a4ee19e832797fcbfc276,Brc1ccccc1-c1ccccc1.FC(F)n1ccnc1-c1ccccc1,FC(F)n1c(-c2ccccc2-c2ccccc2)cnc1-c1ccccc1,C=CC[Pd]Cl.C=CC[Pd]Cl.CC(C)(C)C(=O)[O-].CCCCP(CCCC)CCCC.[K+],CC(=O)N(C)C.CC(=O)N(C)C.CC(=O)N(C)C,C=CC[Pd]Cl.C=CC[Pd]Cl.CCCCP(CCCC)CCCC,0.06
|
93 |
+
91,ord-3fc705ee410b4deab1cf90242dd2b028,O=C(O)Cc1ccc(-c2ccccc2)cc1[N+](=O)[O-],O=C1Cc2ccc(-c3ccccc3)cc2N1,[Fe],CC(=O)O,[Fe],0.93
|
94 |
+
92,ord-e24b7b3e53074286a14395f6e069f883,O=[N+]([O-])c1nc[nH]n1.OB(O)c1cccc(C(F)(F)F)c1.c1ccncc1,O=[N+]([O-])c1ncn(-c2cccc(C(F)(F)F)c2)n1,CC(=O)[O-].CC(=O)[O-].[Cu+2],ClCCl,CC(=O)[O-].CC(=O)[O-].[Cu+2],0.49
|
95 |
+
93,ord-16b2582c09fe4682a6b4b6a8ce6f6f3b,C=C(C)CN(C(C)=O)c1cc([N+](=O)[O-])ccc1Br.CC(=O)[O-].O=C[O-].[Na+].[Na+],CC(=O)N1CC(C)(C)c2ccc([N+](=O)[O-])cc21,CC(=O)[O-].CC(=O)[O-].CC[N+](CC)(CC)CC.O.[Cl-].[Pd+2],CN(C)C=O,CC(=O)[O-].CC(=O)[O-].CC[N+](CC)(CC)CC.O.[Cl-].[Pd+2],0.88
|
96 |
+
94,ord-9385291980c14709b3314e8c4eac0620,CCN(C(C)C)C(C)C.CCN=C=NCCCN(C)C.Cl.O=C(O)/C=C/c1cnc2c(c1)CCC(=O)N2.O=C(O)C(F)(F)F.c1ccc2oc([C@H]3CCCN3)nc2c1,O=C1CCc2cc(/C=C/C(=O)N3CCCCC3c3nc4ccccc4o3)cnc2N1,CN(C)c1ccncc1,CN(C)C=O,CN(C)c1ccncc1,0.13
|
97 |
+
95,ord-6114c9b5157c4a06afa58a4a0bff2316,CN(C)CC(=O)O.COc1cccc(F)c1O.Cl.Fc1ccc(I)cc1.O=C([O-])[O-].[Cs+].[Cs+],COc1cccc(F)c1Oc1ccc(F)cc1,[Cu],C1COCCO1,[Cu],0.21
|
98 |
+
96,ord-3351aa586f3a40cc8b5aa2c4f0d2a2eb,Brc1ccc2nc(N3CCN(C4CC4)CC3)sc2c1.CC(N)=O.CC1(C)c2cccc(P(c3ccccc3)c3ccccc3)c2Oc2c(P(c3ccccc3)c3ccccc3)cccc21.O=C([O-])[O-].[Cs+].[Cs+],CC(=O)Nc1ccc2nc(N3CCN(C4CC4)CC3)sc2c1,CC(=O)[O-].CC(=O)[O-].[Pd+2],C1COCCO1.O,CC(=O)[O-].CC(=O)[O-].[Pd+2],0.24
|
99 |
+
97,ord-5b335e29ecd2454d93b69f994db21e79,CC(C)(Cc1cnc2c(Br)cccn12)[N+](=O)[O-].O=C([O-])[O-].OB(O)c1cccs1.[Na+].[Na+],CC(C)(Cc1cnc2c(-c3cccs3)cccn12)[N+](=O)[O-],c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,C1COCCO1,c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,1.0
|
100 |
+
98,ord-a1b4e2bb9232496f86833446bc1afce1,Brc1cccnc1.CC1(C)CN(c2ccc3ncsc3c2)C(=O)N1.N[C@@H]1CCCC[C@H]1N.O=P([O-])([O-])[O-].[K+].[K+].[K+],CC1(C)CN(c2ccc3ncsc3c2)C(=O)N1c1cccnc1,I[Cu]I,C1COCCO1,I[Cu]I,0.11
|
101 |
+
99,ord-ef50fb1edb404225bc2416be0fd42b65,O=C(NC1CN2CCC1CC2)c1cccc2oc(-c3ccc(I)cc3)nc12.O=C([O-])[O-].OB(O)c1ccccc1.[Na+].[Na+],O=C(NC1CN2CCC1CC2)c1cccc2oc(-c3ccc(-c4ccccc4)cc3)nc12,c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,Cc1ccccc1,c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,0.78
|
102 |
+
100,ord-d685ae2873e64b72a0784c5ddba6999a,C1=CCCCC1.CC(Cc1cccc(C(=O)OCc2ccccc2)c1O)C[Si](C)(O[Si](C)(C)C)O[Si](C)(C)C,CC(Cc1cccc(C(=O)O)c1O)C[Si](C)(O[Si](C)(C)C)O[Si](C)(C)C,[Pd],CCO,[Pd],0.78
|
103 |
+
101,ord-4c25b497c6ec4d309c397d2507c8f033,CC(C)(C)OC(=O)N1CCNCC1.CC(C)(C)[O-].CC(C)c1cc(C(C)C)c(-c2ccccc2P(C2CCCCC2)C2CCCCC2)c(C(C)C)c1.CC1C(=O)N(COCC[Si](C)(C)C)N=C2COc3ccc(Br)cc3N21.[Na+],CC1C(=O)N(COCC[Si](C)(C)C)N=C2COc3ccc(N4CCN(C(=O)OC(C)(C)C)CC4)cc3N21,CC(=O)O[Pd]OC(C)=O,Cc1ccccc1,CC(=O)O[Pd]OC(C)=O,0.33
|
104 |
+
102,ord-f94786e6a22c4c27900d6b1cbb159b5e,CC(C)c1cc(C(C)C)c(S(=O)(=O)Cl)c(C(C)C)c1.CCN(CC)CC.COc1ccc([N+](=O)[O-])c([C@@H](OCc2cn([C@H]3C[C@@](O)([Si](C)(C)C(C)(C)C)[C@@H](CO[Si](C)(C)C(C)(C)C)O3)c(=O)[nH]c2=O)C(C)(C)C)c1,COc1ccc([N+](=O)[O-])c([C@@H](OCc2cn([C@H]3C[C@@](O)([Si](C)(C)C(C)(C)C)[C@@H](CO[Si](C)(C)C(C)(C)C)O3)c(=O)nc2N)C(C)(C)C)c1,CN(C)c1ccncc1,ClCCl,CN(C)c1ccncc1,0.65
|
105 |
+
103,ord-7a092c57e7834c9a9cddcb7260af09a9,CC1(C)c2cccc(P(c3ccccc3)c3ccccc3)c2Oc2c(P(c3ccccc3)c3ccccc3)cccc21.CCS(=O)(=O)c1ccc(N)nc1.Cn1nc(Cl)cc(Br)c1=O.O=C([O-])[O-].[Cs+].[Cs+],CCS(=O)(=O)c1ccc(Nc2cc(Cl)nn(C)c2=O)nc1,O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.[Pd].[Pd],C1COCCO1.ClCCl,O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.[Pd].[Pd],0.25
|
106 |
+
104,ord-7ba08394993f4086bd0ff9b487d332b6,C[C@@H]1CCc2nc(S)nc(O)c21.[NH4+].[OH-],C[C@@H]1CCc2ncnc(O)c21,[Ni],O,[Ni],0.99
|
107 |
+
105,ord-37e1a8f0a8ab45e294a38778ad8848be,CC(=O)N(C)C.O=C1CCCCN1c1ccc(N2CCc3c(C(F)(F)F)nn(-c4ccc(F)c(Cl)c4)c3C2=O)cc1,N#Cc1cc(-n2nc(C(F)(F)F)c3c2C(=O)N(c2ccc(N4CCCCC4=O)cc2)CC3)ccc1F,O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.[C-]#N.[C-]#N.[Fe+2].[Pd].[Pd].[Zn+2].[Zn].c1ccc(P(c2ccccc2)[c-]2cccc2)cc1.c1ccc(P(c2ccccc2)[c-]2cccc2)cc1,,O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.[C-]#N.[C-]#N.[Fe+2].[Pd].[Pd].[Zn+2].[Zn].c1ccc(P(c2ccccc2)[c-]2cccc2)cc1.c1ccc(P(c2ccccc2)[c-]2cccc2)cc1,0.5
|
108 |
+
106,ord-d5751a6de9ce4bea980568f8bdfecde6,CC(=O)Cl.Cc1c(Br)c(F)c(O)c(N)c1C#N.[Cl-].[NH4+],Cc1nc2c(C#N)c(C)c(Br)c(F)c2o1,CCN(C(C)C)C(C)C,CCOC(C)=O,CCN(C(C)C)C(C)C,0.73
|
109 |
+
107,ord-3cac9ca75bf34bae9d8605b4ace7a1c5,CC(C)(C)[O-].CC(C)N1CCNCC1.COc1cc(Br)cc(C2OCCCO2)c1.Cl.[Na+].[Na+].[OH-].c1ccc(P(c2ccccc2)c2ccc3ccccc3c2-c2c(P(c3ccccc3)c3ccccc3)ccc3ccccc23)cc1,COc1cc(C=O)cc(N2CCN(C(C)C)CC2)c1,O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.[Pd].[Pd],Cc1ccccc1,O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.[Pd].[Pd],0.27
|
110 |
+
108,ord-b146ca9198c348cca63bcac068757137,COCOc1cc(OC)ccc1I.O=C1CCC(=O)N1Br,COCOc1cc(OC)c(Br)cc1I,Cc1c(C(C)(C)C)cc(O)cc1C(C)(C)C,CC#N,Cc1c(C(C)(C)C)cc(O)cc1C(C)(C)C,0.76
|
111 |
+
109,ord-50c61014210e4234b91331351ce47941,CCOCOCC.OCC(Cl)CCl,CCOCOCC(Cl)CCl,Cc1ccccc1S(=O)(=O)O.O,,Cc1ccccc1S(=O)(=O)O.O,0.69
|
112 |
+
110,ord-f252f2d4246c4459989daae36b6c6aae,CC1(C)OB(c2ccnc(N3CCOCC3)c2)OC1(C)C.Cc1nc(NC(=O)NC(=O)C(C)C)ccc1Oc1ccnc(Cl)c1.O=C([O-])[O-].[K+].[K+],Cc1nc(NC(=O)NC(=O)C(C)C)ccc1Oc1ccnc(-c2ccnc(N3CCOCC3)c2)c1,c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,C1COCCO1.O,c1ccc([P](c2ccccc2)(c2ccccc2)[Pd]([P](c2ccccc2)(c2ccccc2)c2ccccc2)([P](c2ccccc2)(c2ccccc2)c2ccccc2)[P](c2ccccc2)(c2ccccc2)c2ccccc2)cc1,0.28
|
113 |
+
111,ord-e3950b01d3ad49e3a039c16486e0eb29,CC(C)(C)[O-].NC1CCOCC1.O=C(NCC(O)CN1CCc2ccccc2C1)c1cncc(Br)c1.[Na+].c1ccc(P(c2ccccc2)c2ccc3ccccc3c2-c2c(P(c3ccccc3)c3ccccc3)ccc3ccccc23)cc1,O=C(NCC(O)CN1CCc2ccccc2C1)c1cncc(NC2CCOCC2)c1,O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.[Pd].[Pd],C1COCCO1,O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.O=C(/C=C/c1ccccc1)/C=C/c1ccccc1.[Pd].[Pd],0.24
|
generation_utils.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import torch
|
3 |
+
from torch.utils.data import Dataset
|
4 |
+
|
5 |
+
|
6 |
+
def prepare_input(cfg, text):
|
7 |
+
inputs = cfg.tokenizer(
|
8 |
+
text,
|
9 |
+
add_special_tokens=True,
|
10 |
+
max_length=cfg.input_max_length,
|
11 |
+
padding="max_length",
|
12 |
+
truncation=True,
|
13 |
+
return_attention_mask=True,
|
14 |
+
)
|
15 |
+
return {k: torch.tensor(v, dtype=torch.long) for k, v in inputs.items()}
|
16 |
+
|
17 |
+
|
18 |
+
class ReactionT5Dataset(Dataset):
|
19 |
+
def __init__(self, cfg, df):
|
20 |
+
self.cfg = cfg
|
21 |
+
self.inputs = df["input"].values
|
22 |
+
|
23 |
+
def __len__(self):
|
24 |
+
return len(self.inputs)
|
25 |
+
|
26 |
+
def __getitem__(self, idx):
|
27 |
+
return prepare_input(self.cfg, self.inputs[idx])
|
28 |
+
|
29 |
+
|
30 |
+
def decode_output(output, cfg):
|
31 |
+
sequences = [
|
32 |
+
cfg.tokenizer.decode(seq, skip_special_tokens=True).replace(" ", "").rstrip(".")
|
33 |
+
for seq in output["sequences"]
|
34 |
+
]
|
35 |
+
if cfg.num_beams > 1:
|
36 |
+
scores = output["sequences_scores"].tolist()
|
37 |
+
return sequences, scores
|
38 |
+
return sequences, None
|
39 |
+
|
40 |
+
|
41 |
+
def save_multiple_predictions(input_data, sequences, scores, cfg):
|
42 |
+
output_list = [
|
43 |
+
[input_data.loc[i // cfg.num_return_sequences, "input"]]
|
44 |
+
+ sequences[i : i + cfg.num_return_sequences]
|
45 |
+
+ scores[i : i + cfg.num_return_sequences]
|
46 |
+
for i in range(0, len(sequences), cfg.num_return_sequences)
|
47 |
+
]
|
48 |
+
columns = (
|
49 |
+
["input"]
|
50 |
+
+ [f"{i}th" for i in range(cfg.num_return_sequences)]
|
51 |
+
+ ([f"{i}th score" for i in range(cfg.num_return_sequences)] if scores else [])
|
52 |
+
)
|
53 |
+
output_df = pd.DataFrame(output_list, columns=columns)
|
54 |
+
return output_df
|
model-image.png
ADDED
![]() |
Git LFS Details
|
models.py
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
from transformers import (
|
4 |
+
AutoConfig,
|
5 |
+
AutoModel,
|
6 |
+
PreTrainedModel,
|
7 |
+
T5ForConditionalGeneration,
|
8 |
+
)
|
9 |
+
|
10 |
+
|
11 |
+
class ReactionT5Yield(nn.Module):
|
12 |
+
def __init__(self, cfg, config_path=None, pretrained=False):
|
13 |
+
super().__init__()
|
14 |
+
self.cfg = cfg
|
15 |
+
if config_path is None:
|
16 |
+
self.config = AutoConfig.from_pretrained(
|
17 |
+
self.cfg.pretrained_model_name_or_path, output_hidden_states=True
|
18 |
+
)
|
19 |
+
else:
|
20 |
+
self.config = torch.load(config_path, weights_only=False)
|
21 |
+
if pretrained:
|
22 |
+
self.model = AutoModel.from_pretrained(
|
23 |
+
self.cfg.pretrained_model_name_or_path
|
24 |
+
)
|
25 |
+
else:
|
26 |
+
self.model = AutoModel.from_config(self.config)
|
27 |
+
self.model.resize_token_embeddings(len(self.cfg.tokenizer))
|
28 |
+
self.fc_dropout1 = nn.Dropout(self.cfg.fc_dropout)
|
29 |
+
self.fc1 = nn.Linear(self.config.hidden_size, self.config.hidden_size // 2)
|
30 |
+
self.fc_dropout2 = nn.Dropout(self.cfg.fc_dropout)
|
31 |
+
|
32 |
+
self.fc2 = nn.Linear(self.config.hidden_size, self.config.hidden_size // 2)
|
33 |
+
self.fc3 = nn.Linear(self.config.hidden_size // 2 * 2, self.config.hidden_size)
|
34 |
+
self.fc4 = nn.Linear(self.config.hidden_size, self.config.hidden_size)
|
35 |
+
self.fc5 = nn.Linear(self.config.hidden_size, 1)
|
36 |
+
|
37 |
+
self._init_weights(self.fc1)
|
38 |
+
self._init_weights(self.fc2)
|
39 |
+
self._init_weights(self.fc3)
|
40 |
+
self._init_weights(self.fc4)
|
41 |
+
self._init_weights(self.fc5)
|
42 |
+
|
43 |
+
def _init_weights(self, module):
|
44 |
+
if isinstance(module, nn.Linear):
|
45 |
+
module.weight.data.normal_(mean=0.0, std=0.01)
|
46 |
+
if module.bias is not None:
|
47 |
+
module.bias.data.zero_()
|
48 |
+
elif isinstance(module, nn.Embedding):
|
49 |
+
module.weight.data.normal_(mean=0.0, std=0.01)
|
50 |
+
if module.padding_idx is not None:
|
51 |
+
module.weight.data[module.padding_idx].zero_()
|
52 |
+
elif isinstance(module, nn.LayerNorm):
|
53 |
+
module.bias.data.zero_()
|
54 |
+
module.weight.data.fill_(1.0)
|
55 |
+
|
56 |
+
def forward(self, inputs):
|
57 |
+
encoder_outputs = self.model.encoder(**inputs)
|
58 |
+
encoder_hidden_states = encoder_outputs[0]
|
59 |
+
outputs = self.model.decoder(
|
60 |
+
input_ids=torch.full(
|
61 |
+
(inputs["input_ids"].size(0), 1),
|
62 |
+
self.config.decoder_start_token_id,
|
63 |
+
dtype=torch.long,
|
64 |
+
device=inputs["input_ids"].device,
|
65 |
+
),
|
66 |
+
encoder_hidden_states=encoder_hidden_states,
|
67 |
+
)
|
68 |
+
last_hidden_states = outputs[0]
|
69 |
+
output1 = self.fc1(
|
70 |
+
self.fc_dropout1(last_hidden_states).view(-1, self.config.hidden_size)
|
71 |
+
)
|
72 |
+
output2 = self.fc2(
|
73 |
+
encoder_hidden_states[:, 0, :].view(-1, self.config.hidden_size)
|
74 |
+
)
|
75 |
+
output = self.fc3(self.fc_dropout2(torch.hstack((output1, output2))))
|
76 |
+
output = self.fc4(output)
|
77 |
+
output = self.fc5(output)
|
78 |
+
return output
|
79 |
+
|
80 |
+
def generate_embedding(self, inputs):
|
81 |
+
encoder_outputs = self.model.encoder(**inputs)
|
82 |
+
encoder_hidden_states = encoder_outputs[0]
|
83 |
+
outputs = self.model.decoder(
|
84 |
+
input_ids=torch.full(
|
85 |
+
(inputs["input_ids"].size(0), 1),
|
86 |
+
self.config.decoder_start_token_id,
|
87 |
+
dtype=torch.long,
|
88 |
+
device=inputs["input_ids"].device,
|
89 |
+
),
|
90 |
+
encoder_hidden_states=encoder_hidden_states,
|
91 |
+
)
|
92 |
+
last_hidden_states = outputs[0]
|
93 |
+
output1 = self.fc1(
|
94 |
+
self.fc_dropout1(last_hidden_states).view(-1, self.config.hidden_size)
|
95 |
+
)
|
96 |
+
output2 = self.fc2(
|
97 |
+
encoder_hidden_states[:, 0, :].view(-1, self.config.hidden_size)
|
98 |
+
)
|
99 |
+
return torch.hstack((output1, output2))
|
100 |
+
|
101 |
+
|
102 |
+
class ReactionT5Yield2(PreTrainedModel):
|
103 |
+
config_class = AutoConfig
|
104 |
+
|
105 |
+
def __init__(self, config):
|
106 |
+
super().__init__(config)
|
107 |
+
self.config = config
|
108 |
+
self.model = T5ForConditionalGeneration.from_pretrained(
|
109 |
+
self.config._name_or_path
|
110 |
+
)
|
111 |
+
self.model.resize_token_embeddings(self.config.vocab_size)
|
112 |
+
self.fc1 = nn.Linear(self.config.hidden_size, self.config.hidden_size // 2)
|
113 |
+
self.fc2 = nn.Linear(self.config.hidden_size, self.config.hidden_size // 2)
|
114 |
+
self.fc3 = nn.Linear(self.config.hidden_size // 2 * 2, self.config.hidden_size)
|
115 |
+
self.fc4 = nn.Linear(self.config.hidden_size, self.config.hidden_size)
|
116 |
+
self.fc5 = nn.Linear(self.config.hidden_size, 1)
|
117 |
+
|
118 |
+
self._init_weights(self.fc1)
|
119 |
+
self._init_weights(self.fc2)
|
120 |
+
self._init_weights(self.fc3)
|
121 |
+
self._init_weights(self.fc4)
|
122 |
+
self._init_weights(self.fc5)
|
123 |
+
|
124 |
+
def _init_weights(self, module):
|
125 |
+
if isinstance(module, nn.Linear):
|
126 |
+
module.weight.data.normal_(mean=0.0, std=0.01)
|
127 |
+
if module.bias is not None:
|
128 |
+
module.bias.data.zero_()
|
129 |
+
elif isinstance(module, nn.Embedding):
|
130 |
+
module.weight.data.normal_(mean=0.0, std=0.01)
|
131 |
+
if module.padding_idx is not None:
|
132 |
+
module.weight.data[module.padding_idx].zero_()
|
133 |
+
elif isinstance(module, nn.LayerNorm):
|
134 |
+
module.bias.data.zero_()
|
135 |
+
module.weight.data.fill_(1.0)
|
136 |
+
|
137 |
+
def forward(self, inputs):
|
138 |
+
encoder_outputs = self.model.encoder(**inputs)
|
139 |
+
encoder_hidden_states = encoder_outputs[0]
|
140 |
+
outputs = self.model.decoder(
|
141 |
+
input_ids=torch.full(
|
142 |
+
(inputs["input_ids"].size(0), 1),
|
143 |
+
self.config.decoder_start_token_id,
|
144 |
+
dtype=torch.long,
|
145 |
+
device=inputs["input_ids"].device,
|
146 |
+
),
|
147 |
+
encoder_hidden_states=encoder_hidden_states,
|
148 |
+
)
|
149 |
+
last_hidden_states = outputs[0]
|
150 |
+
output1 = self.fc1(last_hidden_states.view(-1, self.config.hidden_size))
|
151 |
+
output2 = self.fc2(
|
152 |
+
encoder_hidden_states[:, 0, :].view(-1, self.config.hidden_size)
|
153 |
+
)
|
154 |
+
output = self.fc3(torch.hstack((output1, output2)))
|
155 |
+
output = self.fc4(output)
|
156 |
+
output = self.fc5(output)
|
157 |
+
return output * 100
|
158 |
+
|
159 |
+
def generate_embedding(self, inputs):
|
160 |
+
encoder_outputs = self.model.encoder(**inputs)
|
161 |
+
encoder_hidden_states = encoder_outputs[0]
|
162 |
+
outputs = self.model.decoder(
|
163 |
+
input_ids=torch.full(
|
164 |
+
(inputs["input_ids"].size(0), 1),
|
165 |
+
self.config.decoder_start_token_id,
|
166 |
+
dtype=torch.long,
|
167 |
+
device=inputs["input_ids"].device,
|
168 |
+
),
|
169 |
+
encoder_hidden_states=encoder_hidden_states,
|
170 |
+
)
|
171 |
+
last_hidden_states = outputs[0]
|
172 |
+
output1 = self.fc1(last_hidden_states.view(-1, self.config.hidden_size))
|
173 |
+
output2 = self.fc2(
|
174 |
+
encoder_hidden_states[:, 0, :].view(-1, self.config.hidden_size)
|
175 |
+
)
|
176 |
+
return torch.hstack((output1, output2))
|
task_forward/accuracy-and-invalidity-check.ipynb
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"id": "92432099",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [],
|
9 |
+
"source": [
|
10 |
+
"prediction: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1112/1112 [3:05:38<00:00, 10.02s/it]\n",
|
11 |
+
"Top-1: 0.5% || Invalid 16.69%\n",
|
12 |
+
"Top-2: 1.0% || Invalid 23.80%\n",
|
13 |
+
"Top-3: 1.6% || Invalid 28.18%\n",
|
14 |
+
"Top-4: 2.1% || Invalid 31.25%\n",
|
15 |
+
"Top-5: 2.5% || Invalid 33.73%\n",
|
16 |
+
"prediction: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1112/1112 [3:05:18<00:00, 10.00s/it]\n",
|
17 |
+
"Top-1: 0.2% || Invalid 22.41%\n",
|
18 |
+
"Top-2: 0.7% || Invalid 28.65%\n",
|
19 |
+
"Top-3: 1.0% || Invalid 32.95%\n",
|
20 |
+
"Top-4: 1.3% || Invalid 36.12%\n",
|
21 |
+
"Top-5: 1.6% || Invalid 38.94%\n",
|
22 |
+
"prediction: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1112/1112 [3:07:23<00:00, 10.11s/it]\n",
|
23 |
+
"Top-1: 0.2% || Invalid 31.81%\n",
|
24 |
+
"Top-2: 0.6% || Invalid 36.80%\n",
|
25 |
+
"Top-3: 0.8% || Invalid 40.56%\n",
|
26 |
+
"Top-4: 1.0% || Invalid 43.56%\n",
|
27 |
+
"Top-5: 1.1% || Invalid 46.23%\n",
|
28 |
+
"prediction: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1112/1112 [3:04:23<00:00, 9.95s/it]\n",
|
29 |
+
"Top-1: 0.1% || Invalid 57.28%\n",
|
30 |
+
"Top-2: 0.3% || Invalid 61.50%\n",
|
31 |
+
"Top-3: 0.3% || Invalid 64.65%\n",
|
32 |
+
"Top-4: 0.4% || Invalid 67.02%\n",
|
33 |
+
"Top-5: 0.4% || Invalid 69.05%\n",
|
34 |
+
"prediction: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1112/1112 [3:07:16<00:00, 10.10s/it]\n",
|
35 |
+
"Top-1: 0.4% || Invalid 64.24%\n",
|
36 |
+
"Top-2: 0.6% || Invalid 67.45%\n",
|
37 |
+
"Top-3: 0.7% || Invalid 69.89%\n",
|
38 |
+
"Top-4: 0.7% || Invalid 71.78%\n",
|
39 |
+
"Top-5: 0.8% || Invalid 73.41%\n",
|
40 |
+
"\n",
|
41 |
+
"\n"
|
42 |
+
]
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"cell_type": "code",
|
46 |
+
"execution_count": 5,
|
47 |
+
"id": "6a089a12",
|
48 |
+
"metadata": {},
|
49 |
+
"outputs": [
|
50 |
+
{
|
51 |
+
"name": "stderr",
|
52 |
+
"output_type": "stream",
|
53 |
+
"text": [
|
54 |
+
"/tmp/ipykernel_2056154/465102246.py:21: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.\n",
|
55 |
+
" ax.set_yticklabels([int(i) for i in ax.get_yticks()], fontsize=12)\n"
|
56 |
+
]
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"data": {
|
60 |
+
"text/plain": [
|
61 |
+
"<matplotlib.legend.Legend at 0x7f69ce998510>"
|
62 |
+
]
|
63 |
+
},
|
64 |
+
"execution_count": 5,
|
65 |
+
"metadata": {},
|
66 |
+
"output_type": "execute_result"
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"data": {
|
70 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAqkAAAG3CAYAAACXEsyxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABX/klEQVR4nO3deZyNdf/H8feZ7cxmDmOGGcvYQoSQVG5LpYWKViq5SYlSSdLdrU1UKEvqp/WOFOlOypIlEW6EEApZx9LMGGafM/t2rt8fx5xxzFgGM3MNr+fjcR6c7/W5rvO9zlzL+1zLORbDMAwBAAAAJuJR0R0AAAAATkVIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOmUOqSmp6dr1KhR6tatm4KDg2WxWDRjxowSa3fv3q1u3bopMDBQwcHB+uc//6n4+PhidQ6HQ++++64aNGggX19ftWrVSt98802pZwYAAACXhlKH1ISEBI0ZM0a7d+/W1Vdffdq66Ohode7cWQcOHNDYsWM1YsQILV68WLfeeqtyc3Pdal955RW99NJLuvXWW/V///d/ioiIUJ8+ffTf//639HMEAACASs9iGIZRmhFycnKUnJyssLAwbdmyRddee62++OILPfroo251Q4YM0YwZM7Rnzx5FRERIklasWKFbb71Vn376qQYNGiRJiomJUYMGDTRo0CBNnTpVkmQYhrp06aJDhw7p8OHD8vT0vAizCgAAgMqi1EdSrVarwsLCzlr3/fff66677nIFVEm65ZZb1KRJE82ZM8fVtmDBAuXl5WnIkCGuNovFoqeeekrR0dHasGFDabsIAACASs6rLCYaExOjuLg4tWvXrtiw9u3ba8mSJa7n27ZtU0BAgJo1a1asrnB4x44di00nJydHOTk5rucOh0NJSUmqXr26LBbLxZoVAAAAXCSGYSgtLU21atWSh8eZj5WWSUiNjY2VJIWHhxcbFh4erqSkJOXk5MhqtSo2NlY1a9YsFiwLxz169GiJrzFu3DiNHj36IvccAAAAZS0qKkp16tQ5Y02ZhNSsrCxJzksDTuXr6+uqsVqtrn/PVFeSkSNHavjw4a7nqampioiIUFRUlIKCgi54HgAAAHBx2e121a1bV1WqVDlrbZmEVD8/P0lyOx1fKDs7263Gz8/vnOpOZbVaSwy3QUFBhFQAAAATO5dLM8vky/wLT9UXnvY/WWxsrIKDg10BMzw8XMeOHdOpXzJQOG6tWrXKoosAAAAwsTIJqbVr11ZoaKi2bNlSbNimTZvUunVr1/PWrVsrMzNTu3fvdqv77bffXMMBAABweSmzn0W9//77tWjRIkVFRbnafvnlF+3bt0+9evVytd19993y9vbWRx995GozDEOffPKJateurQ4dOpRVFwEAAGBS53VN6tSpU5WSkuK68/7HH39UdHS0JOnZZ5+VzWbTyy+/rO+++0433XSTnnvuOaWnp2vChAlq2bKlBgwY4JpWnTp1NGzYME2YMEF5eXm69tprNX/+fK1du1Zff/01X+QPAABwGSr1L05JUv369XXkyJEShx06dEj169eXJO3atUvDhw/XunXr5OPjozvvvFOTJk1SzZo13cZxOBx655139Omnnyo2NlaNGzfWyJEj9cgjj5xzn+x2u2w2m1JTU7lxCgAAwIRKk9fOK6SaESEVAADA3EqT18rsmlQAAADgfBFSAQAAYDqEVAAAAJgOIRUAAACmQ0gFAACA6RBSAQAAYDqEVAAAAJgOIRUAAACmQ0gFAACA6RBSAQAAYDqEVAAAAJiOV0V3AAAAABXA4ZB27ZKSk6Vq1aSrrpI8zHP8kpAKnMzkKywqIZYpAGa0fr00daq0e7eUkyNZrVKzZtIzz0gdOlR07yQRUs/f5bTjuVzmtRKssKhkWKYAmNH69dKIEVJSkhQeLvn5SVlZ0rZtzvaJE02xjbIYhmFUdCcuBrvdLpvNptTUVAUFBZXti11OO57LZV5Pt8IeO+YM5iZZYVGJsEwBMCOHQ+rbV9q6VbriCsliKRpmGFJkpNS2rTRzZpkckCpNXiOkltbltOO5XOa1gldYXCQnb8oMo+j5qe1nqi2p5ky1pxvf4ZAGDZL++ENq2LBomSqsPXxYatVK+vBD5zJ1utc/l76fqa2sakua9/IY/0zv/fn8nc71tUrz3p1t/NL260zjl/UyUVIfL/bfqTxfy6zLxNnGv9jvXUKCtGKF86CT10kn1K+4wrmfT0+XUlKkr76SWrbUxVaavMbp/tJwOJxHFZOSpFq1nIEtO9s5zGaTYmKk11+XRo6s/GHG4ZDGjpWio53zmp1dNK9BQc72V191hliLpWjlcDiK/l/Sw+Eomn7hv6eru9BpnGk6J497/Li0cqVz5dy3z9l+8gqem+tcoQcNkkJDi9orasdhtp1MaTbGZbHhNiO7Xdq+3bkDsNuLD8/Pl5Yvl3r3dq5PAFBeEhOd+3OLRcrLK2ov3C/6+Tn3i8nJFdO/kxBSS2PXLudp7/Bw5x/v6FH34fn50ubN0pgxlX/Hc/JO9tCh4sPz86Xff5feeafyz2tiopSR4VxBTz6KWsgwnB9I/vpLql69/PuHyic/37k8eXmVvEx5eTk//BiG5OPjPsxicR+n8P+ntp+t9mzjn632Yo5fVrUX67XOZfzSvvdnGr807/OFjn8pLRMXY/zS/J3O9lqVdZk4cMB5kKlKFcnfv6jd19f5b1aW8yhrtWqqaITU0khOdl6X6efn/BRis7kPdzik1FSpQQOpXr2K6ePFcuSIM5TZbEVHhU9eyAvntXFjqVEjZ5uHR9HKUNKjcDol1Z087EzTOtvwc6k5dfjhw86wHRjo/NueujHIynKe/hg8uGheC98PM+xkSnqN8tzwl+dO4tTxzPg+S9KOHVK/flLVqs7l6lSFp9M+/LBMTqcBwGm1aiX98IPzJqmaNd23XYbhvKSvbVvnTdIVjJBaGtWqOT9dZGVJISHOx8nS052fTF54ofLveHbscF5Pd6adbGCg9NxzlX9eHQ5p1SrnClunTvEVNjJSat9e+uc/K/9lHCgfV13lvMFw2zbnBxsT7wQAXGY8PJw3P48Y4dy/hYUVv+fk6adNsb+r+B5UJoU7nmPHSr627tgxqXnzS2PHcznNa+EKW62ac4VNT5cKCpz/RkaaaoVFJcEyBcDMOnRw3vzcpo3zrM7hw85/27Y11U3R3N1fWoV3vCcnl/zpw0R/3At2Oc2rVPLXbTVv7gwTl9J8ovywTAEwswr4HnS+gqoivif1Ut3xXE7zKl0+P1yA8sMyBQAuhNTyuNv8ctrxXE7zCgAAygzfk1oePDwq/w1D5+pymlcAAGAKHA4DAACA6RBSAQAAYDqEVAAAAJgOIRUAAACmQ0gFAACA6RBSAQAAYDqEVAAAAJgOIRUAAACmQ0gFAACA6RBSAQAAYDqEVAAAAJgOIRUAAACmQ0gFAACA6RBSAQAAYDqEVAAAAJgOIRUAAACmQ0gFAACA6RBSAQAAYDqEVAAAAJgOIRUAAACmQ0gFAACA6RBSAQAAYDqEVAAAAJgOIRUAAACmQ0gFAACA6RBSAQAAYDqEVAAAAJgOIRUAAACmQ0gFAACA6RBSAQAAYDqEVAAAAJgOIRUAAACmQ0gFAACA6RBSAQAAYDplGlL379+vhx56SHXq1JG/v7+uvPJKjRkzRpmZmW5169evV8eOHeXv76+wsDANHTpU6enpZdk1AAAAmJhXWU04KipK7du3l81m0zPPPKPg4GBt2LBBo0aN0u+//64FCxZIkrZv366uXbuqWbNmmjx5sqKjozVx4kTt379fS5cuLavuAQAAwMTKLKTOnDlTKSkpWrduna666ipJ0qBBg+RwOPTVV18pOTlZ1apV08svv6xq1app9erVCgoKkiTVr19fTzzxhH7++WfddtttZdVFAAAAmFSZne632+2SpJo1a7q1h4eHy8PDQz4+PrLb7Vq+fLn69u3rCqiS1K9fPwUGBmrOnDll1T0AAACYWJmF1BtvvFGS9Pjjj2v79u2KiorSt99+q48//lhDhw5VQECAduzYofz8fLVr185tXB8fH7Vu3Vrbtm0rq+4BAADAxMrsdH+3bt305ptvauzYsVq4cKGr/ZVXXtFbb70lSYqNjZXkPLp6qvDwcK1du/a008/JyVFOTo7reeGRWwAAAFR+ZRZSJee1pZ07d9b999+v6tWra/HixRo7dqzCwsL0zDPPKCsrS5JktVqLjevr6+saXpJx48Zp9OjRZdZ3AAAAVJwyC6n//e9/NWjQIO3bt0916tSRJN13331yOBx66aWX9PDDD8vPz0+S3I6IFsrOznYNL8nIkSM1fPhw13O73a66dete5LkAAABARSiza1I/+ugjtWnTxhVQC/Xs2VOZmZnatm2b6zR/4Wn/k8XGxqpWrVqnnb7ValVQUJDbAwAAAJeGMgupx48fV0FBQbH2vLw8SVJ+fr5atGghLy8vbdmyxa0mNzdX27dvV+vWrcuqewAAADCxMgupTZo00bZt27Rv3z639m+++UYeHh5q1aqVbDabbrnlFs2aNUtpaWmumpkzZyo9PV29evUqq+4BAADAxCyGYRhlMeE1a9bo5ptvVvXq1fXMM8+oevXqWrRokZYuXaqBAwfqP//5jyRp69at6tChg5o3b65BgwYpOjpakyZNUufOnbVs2bJzfj273S6bzabU1FRO/QMAAJhQafJamYVUSdq0aZPeeOMNbdu2TYmJiWrQoIH69++vf/3rX/LyKrpna926dXrppZe0detWValSRb1799a4ceNUpUqVc34tQioAAIC5mSaklidCKgAAgLmVJq+V2TWpAAAAwPkipAIAAMB0CKkAAAAwHUIqAAAATIeQCgAAANMhpAIAAMB0CKkAAAAwHUIqAAAATIeQCgAAANMhpAIAAMB0CKkAAAAwHUIqAAAATIeQCgAAANMhpAIAAMB0CKkAAAAwHUIqAAAATIeQCgAAANMhpAIAAMB0CKkAAAAwHUIqAAAATIeQCgAAANMhpAIAAMB0CKkAAAAwHUIqAAAATIeQCgAAANMhpAIAAMB0CKkAAAAwHUIqAAAATIeQCgAAANMhpAIAAMB0CKkAAAAwHUIqAAAATIeQCgAAANMhpAIAAMB0CKkAAAAwHUIqAAAATIeQCgAAANMhpAIAAMB0CKkAAAAwHUIqAAAATIeQCgAAANMhpAIAAMB0CKkAAAAwHUIqAAAATIeQCgAAANMhpAIAAMB0CKkAAAAwHUIqAAAATIeQCgAAANMhpAIAAMB0CKkAAAAwHUIqAAAATIeQCgAAANMhpAIAAMB0CKkAAAAwHUIqAAAATIeQCgAAANMhpAIAAMB0CKkAAAAwHUIqAAAATKfMQ+rWrVvVs2dPBQcHy9/fXy1atNAHH3zgVrN+/Xp17NhR/v7+CgsL09ChQ5Wenl7WXQMAAIBJeZXlxH/++Wf16NFDbdq00WuvvabAwEBFRkYqOjraVbN9+3Z17dpVzZo10+TJkxUdHa2JEydq//79Wrp0aVl2DwAAACZVZiHVbrerX79+uvPOOzV37lx5eJR80Pbll19WtWrVtHr1agUFBUmS6tevryeeeEI///yzbrvttrLqIgAAAEyqzE73z549W8ePH9fbb78tDw8PZWRkyOFwuNXY7XYtX75cffv2dQVUSerXr58CAwM1Z86csuoeAAAATKzMQuqKFSsUFBSkmJgYNW3aVIGBgQoKCtJTTz2l7OxsSdKOHTuUn5+vdu3auY3r4+Oj1q1ba9u2baedfk5Ojux2u9sDAAAAl4YyC6n79+9Xfn6+7r77bt1+++36/vvv9dhjj+mTTz7RgAEDJEmxsbGSpPDw8GLjh4eH6+jRo6ed/rhx42Sz2VyPunXrls2MAAAAoNyV2TWp6enpyszM1JNPPum6m/++++5Tbm6uPv30U40ZM0ZZWVmSJKvVWmx8X19f1/CSjBw5UsOHD3c9t9vtBFUAAIBLRJkdSfXz85MkPfzww27tffr0kSRt2LDBVZOTk1Ns/OzsbNfwklitVgUFBbk9AAAAcGkosyOptWrV0q5du1SzZk239ho1akiSkpOT1ahRI0lFp/1PFhsbq1q1apVV9wAAqJQMw1BBQYHy8/MruiuAG29vb3l6el606ZVZSL3mmmu0fPly141ThQqvMw0NDVWLFi3k5eWlLVu2qHfv3q6a3Nxcbd++3a0NAIDLmWEYSklJUXx8vAoKCiq6O0CJqlatqrCwMFkslgueVpmF1N69e2v8+PGaNm2abr75Zlf7559/Li8vL914442y2Wy65ZZbNGvWLL322muqUqWKJGnmzJlKT09Xr169yqp7AABUKseOHVNKSorrEjcvL6+LEgSAi8EwDGVmZiouLk5SyTfFl1aZhdQ2bdroscce0/Tp05Wfn68uXbpo9erV+u677zRy5EjXqfy3335bHTp0UJcuXTRo0CBFR0dr0qRJuu2229StW7ey6h4AAJVGQUGBUlNTFRoaqpCQkIruDlCiwnuJ4uLiVKNGjQs+9V+mP4v6ySefKCIiQl988YXmzZunevXq6b333tOwYcNcNW3bttWKFSv00ksv6fnnn1eVKlX0+OOPa9y4cWXZNQAAKo28vDwZhqGAgICK7gpwRv7+/pKcy+yFhlSLYRjGxehURbPb7bLZbEpNTeVOfwDAJSU7O1uHDh1SgwYN5OvrW9HdAU7rbMtqafJamX0FFQAAAHC+CKkAAAAwHUIqAADAeXjjjTf4hoUyREgFAAAVasaMGbJYLK6Hl5eXateurUcffVQxMTEV2rfMzEy98cYbWr16dbm/dv369d3el9M9ZsyYccb6J598stz7fjGU6d39AAAA52rMmDFq0KCBsrOztXHjRs2YMUPr1q3Tzp07K+yGsczMTI0ePVqSdOONN7oNe/XVV/Xvf/+7zF57ypQpSk9Pdz1fsmSJvvnmG7333ntuX0XWoUMH1/9bt26tF154wW06TZo0KbM+liVCKgAAlzOHQ9q1S0pOlqpVk666SvKomBOt3bt3V7t27SRJAwcOVEhIiN555x0tXLjQlL9C6eXlJS+vsotS99xzj9vzY8eO6ZtvvtE999yj+vXrlzhO7dq11bdv3zLrU3nidD8AAJer9eulvn2lfv2kJ590/tu3r7PdBDp16iRJioyMdLXt2bNHDzzwgIKDg+Xr66t27dpp4cKFbuMlJSVpxIgRatmypQIDAxUUFKTu3bvrjz/+KPYa2dnZeuONN9SkSRP5+voqPDxc9913nyIjI3X48GGFhoZKkkaPHu06ff7GG29IKvma1Pz8fL355ptq1KiRrFar6tevr5dfflk5OTludfXr19ddd92ldevWqX379vL19VXDhg311VdfXfD7lpubq4yMjAueTkUjpAIAcDlav14aMULaulWqWlWqX9/577ZtznYTBNXDhw9LkqpVqyZJ2rVrl66//nrt3r1b//73vzVp0iQFBATonnvu0bx581zjHTx4UPPnz9ddd92lyZMn68UXX9SOHTvUpUsXHT161FVXUFCgu+66S6NHj9Y111yjSZMm6bnnnlNqaqp27typ0NBQffzxx5Kke++9VzNnztTMmTN13333nbbPAwcO1Ouvv662bdvqvffeU5cuXTRu3Dg99NBDxWoPHDigBx54QLfeeqsmTZqkatWq6dFHH9WuXbvO+z1buXKl/P39FRgYqPr16+v9998/72lVOOMSkZqaakgyUlNTK7orAABcVFlZWcZff/1lZGVluQ9wOAwjK6v0j4wMw+jd2zCaNDGM7t0N4447ih7duzvbH3zQWVea6Toc5zV/X3zxhSHJWLFihREfH29ERUUZc+fONUJDQw2r1WpERUUZhmEYXbt2NVq2bGlkZ2ef9BY4jA4dOhiNGzd2tWVnZxsFBQVur3Ho0CHDarUaY8aMcbVNnz7dkGRMnjy5WJ8cJ+YlPj7ekGSMGjWqWM2oUaOMk6PU9u3bDUnGwIED3epGjBhhSDJWrlzpaqtXr54hyVizZo2rLS4uzrBarcYLL7xQ4vs0YcIEQ5Jx6NChEof36NHDeOedd4z58+cb06ZNMzp16mRIMv71r3+VWF8WTrusnlCavMY1qQAAVFY5OVKvXqUfz26Xtm+XvLyklJTiw/PzpaVLpe7dpdL8iuN330kXcIPTLbfc4va8fv36mjVrlurUqaOkpCStXLlSY8aMUVpamtLS0lx1t99+u0aNGqWYmBjVrl1bVqvVNaygoEApKSkKDAxU06ZNtXXrVtew77//XiEhIXr22WeL9eV8vlpqyZIlkqThw4e7tb/wwguaOHGiFi9erJtuusnV3rx5c9clDZIUGhqqpk2b6uDBg6V+bUnFLnsYMGCAunfvrsmTJ+vZZ59VnTp1zmu6FYXT/QAAXG7y8pw3TJ3ut9U9PZ3D8/LKtVsffvihli9frrlz5+qOO+5QQkKCK3AeOHBAhmHotddeU2hoqNtj1KhRkqS4uDhJksPh0HvvvafGjRvLarUqJCREoaGh+vPPP5Wamup6vcjISDVt2vSi3fx05MgReXh46IorrnBrDwsLU9WqVXXkyBG39oiIiGLTqFatmpKTky9KfywWi55//nnl5+dXyFdoXSiOpAIAUFlZrc6jl6W1c6c0cKBks0mBgcWHp6dLqanSBx9ILVqUrj8XoH379q67+++55x517NhRffr00d69e+VwOCRJI0aM0O23317i+IXhcOzYsXrttdf02GOP6c0331RwcLA8PDw0bNgw13TK0rkehfU8zYcEwzAuWl/q1q0ryXkzWWVDSAUAoLKyWM7v9HrbtlLz5s6bpKpUcU6nkGFIcXHOmrZtK+zrqDw9PTVu3DjddNNNmjp1qh577DFJkre3d7HLAk41d+5c3XTTTZo2bZpbe0pKitv3izZq1Ei//fab8vLy5O3tXeK0SnPav169enI4HNq/f7+aNWvmaj9+/LhSUlJUr169c57WxVJ46UDhtxRUJpzuBwDgcuPhIT3zjPN7USMjnUdOCwqc/0ZGOtuffrrCAmqhG2+8Ue3bt9eUKVMUFBSkG2+8UZ9++qliY2OL1cbHx7v+7+npWexo5HfffVfs16vuv/9+JSQkaOrUqcWmVzi+v7+/JGfAPZs77rhDkvNL+E82efJkSdKdd9551mmcr6SkJBUUFLi15eXlafz48fLx8XG7Fray4EgqAACXow4dpIkTpalTpd27pePHnafr27Z1BtSTfsWoIr344ovq1auXZsyYoQ8//FAdO3ZUy5Yt9cQTT6hhw4Y6fvy4NmzYoOjoaNf3oN51110aM2aMBgwYoA4dOmjHjh36+uuv1bBhQ7dp9+vXT1999ZWGDx+uTZs2qVOnTsrIyNCKFSs0ZMgQ3X333fLz81Pz5s317bffqkmTJgoODlaLFi3UooTLIK6++mr1799fn332mVJSUtSlSxdt2rRJX375pe65554yDYoLFy7UW2+9pQceeEANGjRQUlKSZs+erZ07d2rs2LEKCwsrs9cuK4RUAAAuVx06SNdfb5pfnCrJfffdp0aNGmnixIl64okntGXLFo0ePVozZsxQYmKiatSooTZt2uj11193jfPyyy8rIyNDs2fP1rfffqu2bdtq8eLFxX7C1NPTU0uWLNHbb7+t2bNn6/vvv1f16tVdQbjQ559/rmeffVbPP/+8cnNzNWrUqBJDamFtw4YNNWPGDM2bN09hYWEaOXKk6+austKyZUs1b95cs2bNUnx8vHx8fNS6dWvNmTNHvc7nGyBMwGJczKtzK5DdbpfNZlNqaqqCSvN1GQAAmFx2drYOHTqkBg0aVNhv2APn4mzLamnymnk+KgEAAAAnEFIBAABgOoRUAAAAmA4hFQAAAKZDSAUAAIDpEFIBAABgOoRUAAAAmA4hFQAAAKZDSAUAAIDpEFIBAABgOoRUAAAAmA4hFQAA4BJjsVj0xhtvVHQ3LgghFQAAmEJkZKQGDx6shg0bytfXV0FBQfrHP/6h999/X1lZWRXdvUrLYrGc02P16tVnrB8/fny59turXF8NAACgBIsXL1avXr1ktVrVr18/tWjRQrm5uVq3bp1efPFF7dq1S5999llFd7NSmjlzptvzr776SsuXLy/W3qxZM9f/b731VvXr189teJs2bcqukyUgpAIAcBmKj5fs9tMPDwqSQkPLpy+HDh3SQw89pHr16mnlypUKDw93DXv66ad14MABLV68uHw6cwnq27ev2/ONGzdq+fLlxdpP1qRJkzMOLw+c7gcA4DITHy/16SP16nX6R58+zrry8O677yo9PV3Tpk1zC6iFrrjiCj333HOSpPz8fL355ptq1KiRrFar6tevr5dfflk5OTlu49SvX1933XWXVq9erXbt2snPz08tW7Z0ndL+4Ycf1LJlS/n6+uqaa67Rtm3b3MZ/9NFHFRgYqIMHD+r2229XQECAatWqpTFjxsgwDLfajIwMvfDCC6pbt66sVquaNm2qiRMnutUdPnxYFotFM2bMKDZ/p14/+sYbb8hisejAgQN69NFHVbVqVdlsNg0YMECZmZlu4+bk5Oj5559XaGioqlSpop49eyo6Ovqs7/m5yMrKUnZ29kWZ1vkgpAIAcJmx26XERMlqlapWLf6wWp3Dz3Sk9WL68ccf1bBhQ3Xo0OGstQMHDtTrr7+utm3b6r333lOXLl00btw4PfTQQ8VqDxw4oD59+qhHjx4aN26ckpOT1aNHD3399dd6/vnn1bdvX40ePVqRkZHq3bu3HA6H2/gFBQXq1q2batasqXfffVfXXHONRo0apVGjRrlqDMNQz5499d5776lbt26aPHmymjZtqhdffFHDhw+/oPeld+/eSktL07hx49S7d2/NmDFDo0ePLvZ+TJkyRbfddpvGjx8vb29v3XnnnRf0upI0Y8YMBQQEyM/PT82bN9fs2bMveJqlxel+AAAquTMd7PLwkHx83GtzciSHwxlG/fyK10vOmpyc00/71OmeL7vdrpiYGN19991nrf3jjz/05ZdfauDAgfrPf/4jSRoyZIhq1KihiRMnatWqVbrppptc9Xv37tX69et1ww03SJKaN2+u22+/XU888YT27NmjiIgISVK1atU0ePBgrVmzRjfeeKNr/OzsbHXr1k0ffPCB67V69Oihd955R0OHDlVISIgWLlyolStX6q233tIrr7wiyXmJQq9evfT+++/rmWeeUaNGjc7rvWnTpo2mTZvmep6YmKhp06bpnXfecb0fs2bN0pAhQ/Thhx+6XvuRRx7Rn3/+eV6vKUkdOnRQ79691aBBAx09elQffvihHnnkEaWmpuqpp5467+mWFiEVAIBKrlev0w9r10466cCf+vaVkpKkQ4ckLy/no1CVKtJJ987o3/+WTjmz7dK4sTR58oX1W3KGVOdrVzlr7ZIlSySp2BHKF154QRMnTtTixYvdQmrz5s1dAVWSrrvuOknSzTff7AqoJ7cfPHjQLaRK0jPPPOP6v8Vi0TPPPKPFixdrxYoVeuihh7RkyRJ5enpq6NChxfo0d+5cLV261G0apfHkk0+6Pe/UqZPmzZsnu92uoKAg1/tx6msPGzbsgo58/vrrr27PH3vsMV1zzTV6+eWX9eijj8rv1E82ZYTT/QAAoMIEBQVJktLS0s5ae+TIEXl4eOiKK65waw8LC1PVqlV15MgRt/aTg6gk2Ww2SVLdunVLbE9OTnZr9/DwUMOGDd3amjRpIsl5jWlhn2rVqlUsZBfeKX9qn0rj1P5Xq1bNrZ+F78epR2qbNm163q9ZEh8fHz3zzDNKSUnR77//flGnfSYcSQUAoJL77rvTD/M45XDUrFnSwYPOG6OqVpUCAk4/7vjx0ikZ7bTTPV9BQUGqVauWdu7cec7jWCyWc6rz9PQsVfupN0RdTKfrc0FBwWnHqYh+nk5hsE9KSiq31+RIKgAAlZyv7+kfp1436uvrvBbVw6Pkx8ms1nOf7oW46667FBkZqQ0bNpyxrl69enI4HNq/f79b+/Hjx5WSkqJ69epdvE5JcjgcOnjwoFvbvn37JDm/PaCwT0ePHi12JHjPnj2u4VLRUdCUlBS3ugs50lr4fkRGRrq1792797yneTqF70NoeX0vmQipAABctrKypIyM4o/y/nGnf/3rXwoICNDAgQN1/PjxYsMjIyP1/vvv64477pAkTZkyxW345BMXx16Mu9pPNXXqVNf/DcPQ1KlT5e3tra5du0qS7rjjDhUUFLjVSdJ7770ni8Wi7t27S3IeMQ4JCdGaNWvc6j766KPz7lvhtAtv7Cp06vtTGvElfO9YWlqapkyZopCQEF1zzTXnPe3S4nQ/AACXmaAgqXp159dMnfL1oi7VqzvrykOjRo00e/ZsPfjgg2rWrJnbL06tX79e3333nR599FE999xz6t+/vz777DOlpKSoS5cu2rRpk7788kvdc889bjdNXQy+vr766aef1L9/f1133XVaunSpFi9erJdfftl1RLFHjx666aab9Morr+jw4cO6+uqr9fPPP2vBggUaNmyY2/WiAwcO1Pjx4zVw4EC1a9dOa9ascR2ZPR+tW7fWww8/rI8++kipqanq0KGDfvnlFx04cOC8p/nhhx9q/vz56tGjhyIiIhQbG6vp06fr77//1syZM+VzMQ+hnwUhFQCAy0xoqDR7tnl+cUqSevbsqT///FMTJkzQggUL9PHHH8tqtapVq1aaNGmSnnjiCUnS559/roYNG2rGjBmaN2+ewsLCNHLkSLfvLr1YPD099dNPP+mpp57Siy++qCpVqmjUqFF6/fXXXTUeHh5auHChXn/9dX377bf64osvVL9+fU2YMEEvvPCC2/Ref/11xcfHa+7cuZozZ466d++upUuXqkaNGufdx+nTpys0NFRff/215s+fr5tvvlmLFy8udnPYufrHP/6h9evX6/PPP1diYqICAgLUvn17TZ8+XTfffPN59/N8WIyKuPq2DNjtdtlsNqWmprruFAQA4FKQnZ2tQ4cOqUGDBvL19a3o7lwWHn30Uc2dO1fp6ekV3ZVK5WzLamnyGtekAgAAwHQIqQAAADAdQioAAABMh5AKAABwihkzZnA9agUjpAIAAMB0CKkAAAAwHUIqAAAATIeQCgAAANMhpAIAAMB0CKkAAAAwHUIqAAAATIeQCgAAANMhpAIAgMva6tWrZbFYNHfu3IruCk5CSAUAABXGYrGc02P16tVnrB8/fnyJ01+9erXuu+8+hYWFycfHRzVq1FCPHj30ww8/lONc4nx4VXQHAABA+UvITFBqdupph9t8bQrxDynzfsycOdPt+VdffaXly5cXa2/WrJnr/7feeqv69evnNrxNmzbFpj1q1CiNGTNGjRs31uDBg1WvXj0lJiZqyZIluv/++/X111+rT58+F3FucDERUgEAuMzkFeTpyUVP6mDywdPWNKzWUN/c/428Pb3LtC99+/Z1e75x40YtX768WPvJmjRpcsbhkjR37lyNGTNGDzzwgGbPni1v76L5ePHFF7Vs2TLl5eVdWOdRpsrtdP/bb78ti8WiFi1aFBu2fv16dezYUf7+/goLC9PQoUOVnp5eXl0DAOCy4uXhpdpVais1J1VVfasWe6TmpKp2ldry8jDvsaysrCxlZ2efdvhrr72m4OBgTZ8+3S2gFrr99tt11113ubU5HA69/fbbqlOnjnx9fdW1a1cdOHCg2Li//fabunXrJpvNJn9/f3Xp0kW//vqrW80bb7whi8Wiffv2qW/fvrLZbAoNDdVrr70mwzAUFRWlu+++W0FBQQoLC9OkSZPO8524dJVLSI2OjtbYsWMVEBBQbNj27dvVtWtXZWZmavLkyRo4cKA+++wz9erVqzy6BgBApZedn63s/GwZhuFqy3fkKzs/W3kFecVqcwpy1O/qfrJZbcp35Mvf219+Xn7y8/ZTviNfNqtN/Vv3V05Bzmmnm1uQW27zd6oZM2YoICBAfn5+at68uWbPnu02fP/+/dqzZ4/uueceValS5ZynO378eM2bN08jRozQyJEjtXHjRj3yyCNuNStXrlTnzp1lt9s1atQojR07VikpKbr55pu1adOmYtN88MEH5XA4NH78eF133XV66623NGXKFN16662qXbu23nnnHV1xxRUaMWKE1qxZc35vyCWqXD4ijRgxQtdff70KCgqUkJDgNuzll19WtWrVtHr1agUFBUmS6tevryeeeEI///yzbrvttvLoIgAAlVav75wHdmbdO0s2X5sk6YfdP2jmnzN1W8Pb9Ox1z7pq+/7QVzkFOfq8x+fqFNFJyyKXKTMvU1H2KAX7BsuQodsb3a42YW3Ud15f2XPs+vCODxVhi5Ak/XLwF03dPFXX1b5Or3Z+tdzntUOHDurdu7caNGigo0eP6sMPP9Qjjzyi1NRUPfXUU5Kk3bt3S5JatmxZqmlnZ2dr+/bt8vHxkSRVq1ZNzz33nHbu3KkWLVrIMAw9+eSTuummm7R06VJZLBZJ0uDBg3XVVVfp1Vdf1c8//+w2zfbt2+vTTz+VJA0aNEj169fXCy+8oHHjxumll16SJD388MOqVauWpk+frs6dO5//m3OJKfMjqWvWrNHcuXM1ZcqUYsPsdrvrupPCgCpJ/fr1U2BgoObMmVPW3QMA4LJksVjUv3V/+Xv7KzMvU5KUXZAtf29/9W/d3xXAzObXX3/Vc889p549e+rJJ5/U77//rhYtWujll19WVlaWJGe+kFSqo6iSNGDAAFdAlaROnTpJkg4edF67u337du3fv199+vRRYmKiEhISlJCQoIyMDHXt2lVr1qyRw+Fwm+bAgQNd//f09FS7du1kGIYef/xxV3vVqlXVtGlT1+vAqUyPpBYUFOjZZ5/VwIEDS/w0s2PHDuXn56tdu3Zu7T4+PmrdurW2bdt22mnn5OQoJyfH9bxwgQQA4HLzXa/vJElWT6ur7b5m96ln057ytHi61c66b5arNtQ/VJ0iOumnAz/pmrBrFJkcqU4RndQmzHmn/LSe04pNt2vDrupSv4s8LOb4FksfHx8988wzrsDasWNH14GvtLS0Uk0rIiLC7Xm1atUkScnJyZKclxFIUv/+/U87jdTUVNd4JU3TZrPJ19dXISEhxdoTExNL1d9LXZmG1E8++URHjhzRihUrShweGxsrSQoPDy82LDw8XGvXrj3ttMeNG6fRo0dfnI4CAFCJ+Xr5Fmvz8vAq8canU2v7t+6vtX+vVUx6jAJ8AtyOopZmuhWpbt26kqSkpCRJ0pVXXinJeTCsNDw9PUtsL7wmt/Ao6YQJE9S6desSawMDA886zbO9DpzKbClLTEzU66+/rtdee02hoaEl1hQelrdarcWG+fr6uoaXZOTIkRo+fLjrud1udy2kAADg3LQJa6NOEZ00b8883Xvlva6jqJVJ4WnywrzRpEkTNW3aVAsWLND7779fLDier0aNGkmSgoKCdMstt1yUaeL0yuxY/auvvqrg4GA9++yzp63x8/OTJLfT9oWys7Ndw0titVoVFBTk9gAAAKVjsVg0oM0A3dboNg1oM8C016JKUnx8fLG2tLQ0TZkyRSEhIbrmmmtc7aNHj1ZiYqIGDhyo/Pz8YuP9/PPPWrRoUale/5prrlGjRo00ceLEEr8qs6T+4fyVyZHU/fv367PPPtOUKVN09OhRV3t2drby8vJ0+PBhBQUFuU7zF572P1lsbKxq1apVFt0DAAAnaR3WWp/3/Lyiu3FWH374oebPn68ePXooIiJCsbGxmj59uv7++2/NnDnT7aanBx98UDt27NDbb7+tbdu26eGHH3b94tRPP/2kX375pdhXV52Nh4eHPv/8c3Xv3l1XXXWVBgwYoNq1aysmJkarVq1SUFCQfvzxx4s925etMgmpMTExcjgcGjp0qIYOHVpseIMGDfTcc89p9OjR8vLy0pYtW9S7d2/X8NzcXG3fvt2tDQAAXN7+8Y9/aP369fr888+VmJiogIAAtW/fXtOnT9fNN99crP6tt97SzTffrA8++EAff/yxkpKSVK1aNV1//fVasGCBevbsWeo+3HjjjdqwYYPefPNNTZ06Venp6QoLC9N1112nwYMHX4zZxAkWowyu0k1ISNC6deuKtb/66qtKS0vT+++/r0aNGqlly5bq3r27/vjjD+3du9f1VRHTpk3TwIEDtXTpUnXr1u2cXtNut8tmsyk1NZVT/wCAS0p2drYOHTqkBg0ayNe3+M1MgFmcbVktTV4rkyOpISEhuueee4q1F35X6snD3n77bXXo0EFdunTRoEGDFB0drUmTJum2224754AKAACAS0uFf8lZ27ZttWLFCvn5+en555/XZ599pscff1xz586t6K4BAACggpTrF52tXr26xPaOHTvq119/Lc+uAAAAwMQq/EgqAAAAcCpCKgAAAEyHkAoAQCXBz2bC7C7mMkpIBQDA5Ap/6z0vL6+CewKcWeGve3l5XfhtT4RUAABMztvbW1arVampqRxNhanZ7XZ5enq6PlhdiHK9ux8AAJyfkJAQxcTEKDo6WjabTd7e3rJYLBXdLUCS8zR/RkaG7Ha7wsPDL8qySUgFAKASKPx1noSEBMXExFRwb4DiLBaLqlatKpvNdlGmR0gFAKCSCAoKUlBQkPLy8lRQUFDR3QHceHt7X5TT/IUIqQAAVDLe3t7y9vau6G4AZYobpwAAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYTpmF1M2bN+uZZ57RVVddpYCAAEVERKh3797at29fsdrdu3erW7duCgwMVHBwsP75z38qPj6+rLoGAAAAk/Mqqwm/8847+vXXX9WrVy+1atVKx44d09SpU9W2bVtt3LhRLVq0kCRFR0erc+fOstlsGjt2rNLT0zVx4kTt2LFDmzZtko+PT1l1EQAAACZVZiF1+PDhmj17tlvIfPDBB9WyZUuNHz9es2bNkiSNHTtWGRkZ+v333xURESFJat++vW699VbNmDFDgwYNKqsuAgAAwKQshmEY5fmC11xzjSTp999/lyTVrFlTXbp00Zw5c9zqmjZtqrp162rFihXnNF273S6bzabU1FQFBQVd3E4DAADggpUmr5XrjVOGYej48eMKCQmRJMXExCguLk7t2rUrVtu+fXtt27atPLsHAAAAkyjXkPr1118rJiZGDz74oCQpNjZWkhQeHl6sNjw8XElJScrJySlxWjk5ObLb7W4PAAAAXBrKLaTu2bNHTz/9tG644Qb1799fkpSVlSVJslqtxep9fX3dak41btw42Ww216Nu3bpl1HMAAACUt3IJqceOHdOdd94pm82muXPnytPTU5Lk5+cnSSUeLc3OznarOdXIkSOVmprqekRFRZVR7wEAAFDeyuzu/kKpqanq3r27UlJStHbtWtWqVcs1rPA0f+Fp/5PFxsYqODi4xKOskvPo6+mGAQAAoHIr05CanZ2tHj16aN++fVqxYoWaN2/uNrx27doKDQ3Vli1bio27adMmtW7duiy7BwAAAJMqs9P9BQUFevDBB7VhwwZ99913uuGGG0qsu//++7Vo0SK30/W//PKL9u3bp169epVV9wAAAGBiZfY9qcOGDdP777+vHj16qHfv3sWG9+3bV5IUFRWlNm3aqGrVqnruueeUnp6uCRMmqE6dOtq8efM5n9Lne1IBAADMrTR5rcxC6o033qj//e9/px1+8svu2rVLw4cP17p16+Tj46M777xTkyZNUs2aNc/59QipAAAA5maKkFreCKkAAADmZtpfnAIAAADOBSEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApuNV0R2obBIyE5SanXra4TZfm0L8Q8qxRwAAAOeusmQZQmop5BXk6clFT+pg8sHT1jSs1lDf3P+NvD29y7FnuFCVZYVF5cEyBcCMKlOWMUVIzcnJ0euvv66ZM2cqOTlZrVq10ltvvaVbb721orvmxsvDS9W8aishfZtqBdST5aRhhqSjGUd0TWhteXmY4m29YHv+TtDRpNPvZGsF23RlROXfyeYV5GnA92dfYX/oU/ErLCoHlikAZlWZskzF90DSo48+qrlz52rYsGFq3LixZsyYoTvuuEOrVq1Sx44dK7p7LgkJFv35VX8lNFirpPx8eebbXMMKvFLl8LLpz7X9ldDJotDQCuzoRXD0WJ46v/uk0r1Pv5MNzGuo7a9+o1phlXsnm5zopT/W1tZx2zb5ZNUrNjzX74jSdtZW8u1eqlGjAjqISodlCoBZVaYsU+E3Tm3atEn//e9/NW7cOE2YMEGDBg3SypUrVa9ePf3rX/+q6O65sdul3L/bKCilkxz+cZJvqgoCo2T4JcrhH6eglE7K/buN5v31oxbsWaCM3AzXuDH2GK05skZ7Eva4TXN3/G79Ff+XsvOzXW0ZuRk6mnZUKdkpbrU5+TnKK8iTYRhlOp+SlJnuJcNeWw6fVFmNqsUeDp9UGfbaykw3xeecC5KWZpH//v7yKrDJ0ztfVo8A18PTO19eBTb57++vtDTL2ScG6NJbphyGQwWOArdtj8NwKLcgV7kFuW61Ofk5yszLVL4j39WW78iXPceu9Nx0t9r03HQlZSUpJz/HrTY+I16JmYlutSnZKYpNi3XbruYV5CkqNUox9hi32oTMBB1OOex2uUW+I18Hkg4oMinSrfZ4+nHtTdirhMwEt9qdcTu1M26n2zwfTTuqP479odi0WFebYRjaGrtVvx/9XQWOAld7jD1Gm2I26XDKYbfX2xC1Qeuj1ru9b1GpUVpzZI32Jux1q117ZK1WHVqlrLwst9rlkcv15/E/3WpXHVqlnw78pLScNLfaH/f+qE0xm9xql0cu1/w985WUleRqi7ZHa+5fc7XmyBq32mUHlunbnd/qePpxt3n7+s+v9dOBn9xql+5fqi+3f6mo1ChXW2xarKZvm655u+e51S7Zv0Sf/f6ZDiQdcLUdTz+ujzZ/pFl/znKrXbRvkT747QPtitvlakvITNB7G97TZ79/Vqx2wq8T9PvR311tKdkpGrd2nCatn1Ss9q01b2l91HpXW3puut5Y/YZGrx5drL+vrXxNvxz8xdWWk5+jkStGauSKkcoryHOrffHnF/Xj3h9dbYZhaPiy4Rq+bLjbMrzswDINXTpU3+781u31nlv6nIYsHuL2N1pxcIUG/ThI07dNL1b7+ILH3ZbLVYdWqf/8/vrgtw+K1Q5c0ldpCVVdWcbwS5SnT7a8vA23LGO3q8JVeEidO3euPD09NWjQIFebr6+vHn/8cW3YsEFRUVFnGLv8WWRR3cT+8jb8VWCNV55PrPJ9jsvb8Fft+P4yHBZ9f+ArfbblcyWmF20stsZu1TtrJ+j7XQuVnS3XY8zqt/Xispf0d9IxV+36qPV6YsFgTf71A7faJ398Wvd8c592HtvnVtt7zkMas2qsW+2oX97Ukwuf1o6jRRu93fG79e/lr+rj3z53q52xdbYmrJ2i3ceLjprGZx2TV06wLBbJ4pkvHwXIRwGSV5byfGPl6fBT9aj+slgsisuI0xe/z9R3Oxa4TXfR7p/1yW/TtPPoftd0EzIT9H8bP9b0LTPdaufvWqKJ697X5r+LNrxJWUkat2aCJq1zfx/m7lioN1a+pf9FblTh/iM1O1Wv/jJKr/8yxq12zp/z9eKykVq65xdXbWZepoYtHa6hi4crM8uhnBzJmtxGvnm1lBG4Q3nWWHl6Sh6ehvKtxyXPfMVH/EepmVmu6f6wc5EGLxyimdu+1cmfGZ5e/KwGLxyi46mprtrFu5dr8MIhmr5lphyOotrnf3pBgxcO0d+Jca7a5fv+p8ELh+jj3z53q31p+UgNXjhEB+KiXbWrIzdo8MIhen/9Ryoo2jfq9ZVvaPDCIdp97KCrdv3h3zV44RBNWDvFrfat/43V4IVDtD16t6v296idGrxwiN5a/Y7yi3KG3l03UYMXDtHmv/901f4Zs1eDFw7RqF/edKt9f+MHGrxwiNYd2uyq3Xv8sAYvHKKRP7/mVvvR5k80eOEQ/XJgnav2UEKsBi8cohd+esmtdtrW6Rq8cIiW7V3lqo1OStDghUP07OLn3Wpn/fm1Bi8cooV//eSqjU9N0+CFQzR44RDlFe1TNGfXdxq8cIjm7ihaP1PTc1y1GdlFgWL+ngUavHCIZm+f66rNyjJctYlpzvXeL7WN/HIjlBG4Q7m+0fL0kDw8DOVb4ySPAsXV+0RxaUU7oOWRy/XoD48XWz+f+vFZ/fP7R3U48airduWhlXr4u76asHZKsdpe3z6kPSety2uOrNH9/+2lN1a+5VY7ZNFQ9Zx9r7ZFF+3410et152zeuiFn15yqx26eLh6zr5HG//e6qrdfmy77v3v/Xp+6Ytuta+sGKVe3z6oNQd/c9XuTdirh+c+omFLXnCrHb9mov75fX+tPLDOVXsk5Ygenf+Yhi11r/2/DR9r4IJBWr7/f67auIw4PbloSLHaaVu+0tOLntVP+1a61s+U7BQ9t/R5Pf/TCLfa//75vYb/NEKL9yx31WblZeml5SP10s8jlZnlcNUu/GupXl7xqn7c/ZOrNt+Rr9dXjtLrK99QSnq2q/bn/as1etWbbrWSNHbtOL39v3FKsKe7atce+k3vrJ2ghbuXuq33H/z2f5r462QdS01x1W6K2q4pGz4oVjtt2xf6v40fKiY5oWj9PLpHn2z+TIv2/OS23v935xz9Z8s0/Z143FW75/ghfbH1y2K1P+5dpK+2z9LhxFhX7cGEGM3+879asvdnt3Vu+cFfNGfnXB1KPFq0fibH6/td87Rs/wq32nVHftWC3T/qUEKMq/Z4aooW712qFQdWudVuPbpNy/Yv16HEotrEtHStiFypVQfXuNXuivtLqw+tcatNSc/WuiPr9evfG91qI5MOasPfv7nVpmXmaXP079oUvcVtGxGVGq2tR7frSFLRvGVmOfTnsZ3689hO5eYV/ZHjMuL1V9weRaccd1vW9sbv1974/crKKXqDk7OTFZl4SLH2BLfawylROpIcpczsog6n56YrJjVWcWnJbrXH0uJ1LC1OmTlFtdn52UpIT1JyRppbbXKmXak5qZIKXFnG8E6Xp6chw8cub8NfdRP7yyJzfICu8MNg27ZtU5MmTRQUFOTW3r59e0nS9u3bVbdu3WLj5eTkKCen6NO3vRwjf2B6GwWnd9LxqguVbw9Tvk+ydKiTDm1oo4J8KXN1J3lZc/RxpJ/GjXKOU92/uiI3tNKxn+tp48Siaf1VJ1z5ngH6v11W/d84Z5uHxUM7t/vp4P98tW1KUe32+gXK9ZImb/fQFyemkZOfo41bM7Q9OVubJxfV7og4qiyfaCUvz9O3/+dsS85O1n9X/SHP5FwtiS6q3VV3szKsB7Quu6MWfNxQkhSXFatEv80qSK+iPK84eaQGySKL8qrFyfA6Lp+Y7vJLbSPJGTzfXjBHeYm19dXfd7umu7fWeqX6/66mSfW15svGkiR7jl1Tly1RZkJ1zTv8T1ftgbA/lBS4XnPiG2vTrFaSnDuKr/63RvaEAK1+Z6ir9mDNSCVU+U2LEq7Suv9Ivr5SniNP8zZuVWK8l9t7djg0VnG2nfolqaU6fCDZbM4jQL9s36+4OCnyE4eyMjx0+JBFubWuUX7rtUrPs8vHCFeBt12eDn/lZlaR3YjW8887FOTnnG50cKqOBkdpfWqSurwhRUQ423/bHaWomAIdnl4gnxPbodiqdkWFRGmdvYk6/Vtq7HwrtGlPtA4cydTBGfnyPbExPG5L15HQKK1Jr6eO/lLLls72LXuPamdkkg58mSf/E3kpvkqGDtWM0uqMmurgLV17rbP9931HtWVvrPbOzFGVEwfokwKzdCAsSkFZVXWDRSq8imbbgVit2xml3bNyZDtxsCbFP0v7akUpINtXNxRIXbueWKYOHtfP26K0a3aWqp04EGD3y9Ge2lHyy5XaZ0t33uls/+twvOb/FqUd32Sq+omDZ+nWXP1VN0rWvBxdmy7dd5+zfX90guavidL2bzNU48RqnOWTpx0RUfIusOmaJKlPH2d7ZGyi5q+K0tY5aQo7cZAsx6tAf9SPkofDqjbHpccec7b/HZ+k+auitGWuXf9JdrbleTq0rYHzg2/rGOmpp078jZKTNX9VlDZ9n6ovT+TGAouh3xs5axtHGhrxvLM9MT1V81dFaWNKir45cfDNkLT5Cmdt+C6H/tnb+YHWlna9EoNWKSMvTTmpksPHLuX7S9lVleuI1fQZ+brhxPKakZehZb/GaVNqipYUHbTS1gaJyvdMU9CWAn14YhuRW5CrNZtStS01Q2veLardXj9duV4ZemdzgWsbUeAo0Obt2dqRnKPf3yuq3RGRpyyffI3fYLi2EZK06y9pT7KhfR+e1FZXyrBK4381tODjovbdu6W/kqS/Pz2prbZFaX7S+FXSzdOcbRaLRXv3SjuTpF7Timr3hXsoNcCiccul7l8W1R484K0d8d7qNaOoNrKmj1ICfDVuqafu+drZ5mHxUPTBQCUfD1Cvkw6+HQkJUFIVmyb9ZNXdnzu3ER4WDyUcCVHsUU/1+qaoNjrYpsQqNRX3U4Du+dC5jfCweMgeXVt//y09+K1cu+vYqsFKCIrQ8WVVdc8kqUYNZ3+zjzVUZKT0zzke8jyRVeKCqishqImOL6uhu2sXbSMsCc21Z7dDj831lPeJbURiYA3F21rq2M91dE9Y0TbCO+lq7d+Woyfn+ri2Jyn+YYqzXatj2Y10d/WibYR/yjXasjFdQ7/3l/VEVrH71VScraOO5TRSj6CibUTV9Ou0YXWqRvwQJL8T2550a03F2brqWF4dbQgo2kaEZP1Dq1c21Ss/VHfVZvrUUJztDh3Pq6H/+RZtI2rnddYvv1yp0fNqurZT2d6hig+6T3EF1bTMs2gbUV83aunyKzV+XoQ+OFGb6xmieNvDiisI1EIVbSMae9+kBT831eR5V+jTE7V5HsFKCBqgeMNXc/KKthFX+d+sucua6qN5LfTlidoCjyAlVHlSFsNTX2UWbSNa227W7KVNNW1eY/33RK3DEqCkwGGSpM/TirYR14bcpBlLmujrefU070StIR+lBL4kSfo03lPDT2wjOtS6SZ+Pb6K5eeFa4qqV7P6vO2uj/PXKSGd7l3pd9NGYplqUH6JfTgrFdr/Rkgx9GllVb57IEZ0iOumjN5toRZZNv55Um2EdI0MOfbqrpt49sY34R8Q/9MnbTfVraoB6nbTeZ3m/qdysAsUfCFfN5t4KTu+kuKClshg+yvWKVqj9dgWmt9Hp70YpXxUeUmNjYxUeHl6svbDt6NGjxYZJ0rhx4zR69OgSh5W1wqOpSYFrlasCWXJC5Lu36JNHnZihCgiQfE/K1h3qdlCL4x10Uq6WJDWPniBJCmxR1Na1YVd1jO1a7FB7qyOfymHJk62Rr6vt+jrXq0PSJ0qK93GrbXB8uBweWQoKre9qa1K9iVpnjFBSUhW32popPZXrlSh/ax1XW7BvqKyH7lGBd7LyrvxGho9dllybPAqsMrKryXffI675DfYLVmNHD8Wl2dymG5z+D/nn1JdfXoSrrapvVbXQQzqS4u9WWz3tRvnnNFZgdlNXm83Xpms9ntDeBPd5C7F3VWBWM7faKj5VdKP1eW2Ncz85EGq/TUFZreSXU9QHXy9f3RHwutYflSzydLVbdz4ho9p+5dXcJKPAUK5XnELtt8nY+rgS4i3yqOd70nRvUVBWS3nnV3d7vQeqv6mlmwx5OYre4+D0zgrIaSzv/Gputb1qvKZ56x1u06iWcZ38cuvKq8D9vewV9m/lr8mTNa9oXbFlttWVMW/Lq8D9A16v2i8ofWWO/HKL5rlKVktdGfO2PAsC3Wrvr/usEpZlyz+ngastILuprox5Wx4OP7faeyMG6+9FmfLLLbrG0j+n4Ylaq1vtPfUHaM8P6W598Mur46w13P+ePev31dZveso3t2j588mroStj3pbFcN9E9WjwoH6NuV2+ebVcbd4F1XRlzNuS4SFdXVR7R4N7tSymi6x5Ya42z4IAZ60kNSmqvb3BXfoh5nr55BVdIOph+LhqPSKKrru+uf6tujKmtXzy3W8aLKz1rhHgags7NkAx+X8qr+ZvMmTI8IuTz9+3y/vPwSookKw9q7pqO9frrFaxzeTIDio2XcPiUEDjmq62f9T9h9rFXaksu/8ptWNlyKGg+kUXkl1f53r9I/E/xbYRTWPeliwOVQ0vWtba1Wqnrqlf6ehRr+K1cig4pGiZuLrm1borc44ORLuvc02PjpFkyFalaBrNQprpgbwF2nnE/ehMk1jnTtt60uLTsFpDPaQftOWIW6kaHX9BpwqvEq6Hvb/Rr6fU1ksYrHoJg93agv2C9bD/F/rllNo6SY+oTtIjbm0BPgHqE/SJlvx9yuul3KvwlHvd2rw8vNSn2vuad8pJvxr2bqph71aszw+FjNc30e5t1dM7q3p652K1D9R4RWmn7AKrZl6rqpnXFqu9u+Zzio11bwvKaqWgrFbFau8IG6h9x93bAnOaKDCuSbHaW8Ie0u/x7m3+ufVVP/6pYrWda/bU6gT3Nt+8cNVNHFCs9oaat+rHJPc2n4Lqqp3Up1htuxodVSvZvc3bEaTwlPuK1bYKaaewlHZubZ4Of9VMvbNYbbPqLVTD3sKtzcPwUUha12K1jao1VkhaY7c2izwVnO5M8x4nLdoRQRGqlhFxSq3F9XfzPKk2vEq4bFnFM1BQlvPTx8lrYnX/6grKqa6cPPfagJwrnNM9eXxrkALzgnTSVTeSnNthR45kcTg/fBVmmWyfGHk6zHUUVZIsRnlc4HgGjRo1UtOmTbVkyRK39oMHD6pRo0Z67733NGzYsGLjlXQktW7dukpNTS12VPZiiYyUevWSqlaV/AMM7ao7TMeqzlPN5HvV/O8pysywKCVFmj1bathQ8vCQfE7aL2Rnn37aF1KbkyOd7q9osbjvAM61NjJSeuABKchm6EizYUqwLZN/9hXK9D2gkNTbVW/3FKXZLfruO6lRIyk3V26nnk7lW5TvLmqt1erstyTl5cntNNW51h486PwUXrWqlB+6VTsaDJRhKZDF8NTVRz6XZ1xbJScX/V3PNN38fLmdTjqVj4/z73exa729JU/P0tcWFMjtlNapvLycj9LWOhzOv93FrjUMFfugd761np7O9+Ji13p4SFFRRduK3OonlikVyCJPtTz0ubzi2yolRfrmG6lZs6JxK9M2Qqq49b68txFnqz3bOsc2onjt5b6NKI9scLragwelRx6RqlVzzzJhKffqqqiiLFO4f7/Y7Ha7bDbbOeW1Cj+S6ufn5xY2C2WfeHf9/PyKDZMkq9Uqq9Va4rDyYJFFdRMGqMAjQxGJA+TpYZGHh3MhsVrdN6KFSmo7ndLUluZtKE2txSJ5elgUkdRfyVXWKtsaJU+HvyKS+svTw/2Tlo/PaSZSgrKq9fYu2kiUptZqletvVzXbeSlH4QoblNlGmZYz/11PdvIG8GzMUOvpWbQzupi1Hh7nvgyXptZiqVy1UvFlqmp2G2V6FN+hSJVvG2GG9b48thFnY4Z1mW2EkxnW+9JuI8q79uQPTSdnmboJA0x1FFUyQUgNDw9XTExMsfbYE+ctatWqVWxYRcs6ce2eV0ZrXZHwuSQp46T2S0lWluQb30ZBQZ0UHzJPoQn3yjO+zSU7r5JFIVEDlOPIUEjMAGVmWC7JeUX5YJkCYFaVIctUeEht3bq1Vq1aJbvd7nbY97fffnMNN4ugIKl6dSkx8fSH8qtXd9ZVdu7zalHA/gHKzM9QwKEBSrU7P2ldmvMqKaW1wmI+V4GklBM1l8q8onywTAEwq8qUZSr8mtTffvtN119/vSZMmKARI0ZIcl5v2qJFC1WvXl0bN248p+mU5hqHCxEfrzN+d1hQkCr8y28vFua1yKU0rygfLFMAzKoit0+V6prU6667Tr169dLIkSMVFxenK664Ql9++aUOHz6sadOmnX0C5Sw09PLZsTCvwPljmQJgVpVl+1ThIVWSvvrqK7322muaOXOmkpOT1apVKy1atEidOxf/Sg4AAABc+ir8dP/FUl6n+wEAAHB+SpPXKvxnUQEAAIBTEVIBAABgOoRUAAAAmA4hFQAAAKZDSAUAAIDpEFIBAABgOoRUAAAAmA4hFQAAAKZjil+cuhgKf5PAfqYfowUAAECFKcxp5/JbUpdMSE1LS5Mk1a1bt4J7AgAAgDNJS0uTzWY7Y80l87OoDodDR48eVZUqVWSxWMrlNe12u+rWrauoqKhL/qdYmVfg/LFMATCr8t4+GYahtLQ01apVSx4eZ77q9JI5kurh4aE6depUyGsHBQVdNjse5hU4fyxTAMyqPLdPZzuCWogbpwAAAGA6hFQAAACYDiH1AlitVo0aNUpWq7Wiu1LmmFfg/LFMATArM2+fLpkbpwAAAHDp4EgqAAAATIeQCgAAANMhpAIAAMB0CKkAAAAwHUIqAAAATIeQegbp6ekaNWqUunXrpuDgYFksFs2YMaPE2t27d6tbt24KDAxUcHCw/vnPfyo+Pr58O3wBdu3apV69eqlhw4by9/dXSEiIOnfurB9//LFYbWWf19WrV8tisZT42Lhxo1vt+vXr1bFjR/n7+yssLExDhw5Venp6BfUcZlAW2wWHw6F3331XDRo0kK+vr1q1aqVvvvmmjOcEwKVm8+bNeuaZZ3TVVVcpICBAERER6t27t/bt21estjJsny6Zn0UtCwkJCRozZowiIiJ09dVXa/Xq1SXWRUdHq3PnzrLZbBo7dqzS09M1ceJE7dixQ5s2bZKPj0/5dvw8HDlyRGlpaerfv79q1aqlzMxMff/99+rZs6c+/fRTDRo0SNKlMa+Fhg4dqmuvvdat7YorrnD9f/v27eratauaNWumyZMnKzo6WhMnTtT+/fu1dOnS8u4uTKIstguvvPKKxo8fryeeeELXXnutFixYoD59+shiseihhx4qpzkDUNm98847+vXXX9WrVy+1atVKx44d09SpU9W2bVtt3LhRLVq0kFSJtk8GTis7O9uIjY01DMMwNm/ebEgyvvjii2J1Tz31lOHn52ccOXLE1bZ8+XJDkvHpp5+WV3cvuvz8fOPqq682mjZt6mq7FOZ11apVhiTju+++O2Nd9+7djfDwcCM1NdXV9p///MeQZCxbtqysuwmTutjbhejoaMPb29t4+umnXW0Oh8Po1KmTUadOHSM/P7/sZgbAJeXXX381cnJy3Nr27dtnWK1W45FHHnG1VZbtE6f7z8BqtSosLOysdd9//73uuusuRUREuNpuueUWNWnSRHPmzCnLLpYpT09P1a1bVykpKa62S21e09LSlJ+fX6zdbrdr+fLl6tu3r4KCglzt/fr1U2BgYKWcV1wcF3u7sGDBAuXl5WnIkCGuNovFoqeeekrR0dHasGHDxZ0BAJesDh06FDuj2bhxY1111VXavXu3q62ybJ8IqRcoJiZGcXFxateuXbFh7du317Zt2yqgV+cvIyNDCQkJioyM1HvvvaelS5eqa9euki69eR0wYICCgoLk6+urm266SVu2bHEN27Fjh/Lz84vNq4+Pj1q3bl3p5hXlqzTryrZt2xQQEKBmzZoVqyscDgDnyzAMHT9+XCEhIZIq1/aJa1IvUGxsrCQpPDy82LDw8HAlJSUpJyfHlL+JW5IXXnhBn376qSTJw8ND9913n6ZOnSrp0plXHx8f3X///brjjjsUEhKiv/76SxMnTlSnTp20fv16tWnT5qzzunbt2vLuNiqR0qwrsbGxqlmzpiwWS7E6STp69GjZdxjAJevrr79WTEyMxowZI6lybZ8IqRcoKytLkkoMZr6+vq4aswe3QsOGDdMDDzygo0ePas6cOSooKFBubq6kS2deO3TooA4dOrie9+zZUw888IBatWqlkSNH6qeffjrrvBYOB0pSmnXldOvMyXUAcD727Nmjp59+WjfccIP69+8vqXJtnzjdf4H8/PwkSTk5OcWGZWdnu9VUBldeeaVuueUW9evXT4sWLVJ6erp69OghwzAuuXk92RVXXKG7775bq1atUkFBwVnntbLOJ8pHadYVPz+/S3KdAlCxjh07pjvvvFM2m01z586Vp6enpMq1fSKkXqDCQ96Fh89PFhsbq+DgYNMfWTyTBx54QJs3b9a+ffsu+XmtW7eucnNzlZGRcdZ5rVWrVnl3D5VIadaV8PBwHTt2TIZhFKuTxLIGoNRSU1PVvXt3paSk6KeffnLbjlSm7RMh9QLVrl1boaGhbjfdFNq0aZNat25d/p26iAoP5aempl7y83rw4EH5+voqMDBQLVq0kJeXV7F5zc3N1fbt2yv9vKJslWZdad26tTIzM93uvJWk3377zTUcAM5Vdna2evTooX379mnRokVq3ry52/DKtH0ipF4E999/vxYtWqSoqChX2y+//KJ9+/apV69eFdizcxcXF1esLS8vT1999ZX8/PxcC/mlMK8l/aLGH3/8oYULF+q2226Th4eHbDabbrnlFs2aNUtpaWmuupkzZyo9Pb3SzCsqzrmuK3fffbe8vb310UcfudoMw9Ann3yi2rVru10/DQBnUlBQoAcffFAbNmzQd999pxtuuKHEusqyfbIYpx7DhZupU6cqJSVFR48e1ccff6z77rtPbdq0kSQ9++yzstlsioqKUps2bVS1alU999xzSk9P14QJE1SnTh1t3ry5UpwCv/fee2W329W5c2fVrl1bx44d09dff609e/Zo0qRJGj58uCRdEvN68803y8/PTx06dFCNGjX0119/6bPPPpO3t7c2bNjg+qqNrVu3qkOHDmrevLkGDRqk6OhoTZo0SZ07d9ayZcsqeC5QkS72duFf//qXJkyYoEGDBunaa6/V/PnztXjxYn399dfq06dPRc0mgEpm2LBhev/999WjRw/17t272PC+fftKKt2+vEK3T2X6UwGXgHr16hmSSnwcOnTIVbdz507jtttuM/z9/Y2qVasajzzyiHHs2LGK63gpffPNN8Ytt9xi1KxZ0/Dy8jKqVatm3HLLLcaCBQuK1Vb2eX3//feN9u3bG8HBwYaXl5cRHh5u9O3b19i/f3+x2rVr1xodOnQwfH19jdDQUOPpp5827HZ7BfQaZnKxtwsFBQXG2LFjjXr16hk+Pj7GVVddZcyaNasc5wjApaBLly6n3TadGvkqw/aJI6kAAAAwHa5JBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApkNIBQAAgOkQUgEAAGA6hFQAAACYDiEVAAAApvP/VBMLpeFdQ3wAAAAASUVORK5CYII=",
|
71 |
+
"text/plain": [
|
72 |
+
"<Figure size 800x500 with 1 Axes>"
|
73 |
+
]
|
74 |
+
},
|
75 |
+
"metadata": {},
|
76 |
+
"output_type": "display_data"
|
77 |
+
}
|
78 |
+
],
|
79 |
+
"source": [
|
80 |
+
"# top1 accuracy\n",
|
81 |
+
"CompoundT5 = [0, 0, 0, 0, 0]\n",
|
82 |
+
"ReactionT5 = [92.8, 92.8, 92.9, 93.0, 93.2]\n",
|
83 |
+
"T5Chem = [0.5, 0.2, 0.2, 0.1, 0.4][::-1]\n",
|
84 |
+
"\n",
|
85 |
+
"\n",
|
86 |
+
"# plot\n",
|
87 |
+
"import matplotlib.pyplot as plt\n",
|
88 |
+
"fig, ax = plt.subplots(1, figsize=(8, 5))\n",
|
89 |
+
"\n",
|
90 |
+
"\n",
|
91 |
+
"ax.plot([10,30,50,100,200], ReactionT5, \"o-\", label='ReactionT5', color='red', alpha=0.7)\n",
|
92 |
+
"ax.plot([10,30,50,100,200], CompoundT5, \"s--\", label='CompoundT5', color='blue', alpha=0.7)\n",
|
93 |
+
"ax.plot([10,30,50,100,200], T5Chem, \"v:\", label='T5Chem', color='green', alpha=0.7)\n",
|
94 |
+
"\n",
|
95 |
+
"\n",
|
96 |
+
"plt.ylim(-5, 100)\n",
|
97 |
+
"ax.set_xticks([10,30,50,100,200])\n",
|
98 |
+
"ax.set_xticklabels([10,30,50,100,200], fontsize=12)\n",
|
99 |
+
"# ax.set_yticks([10,20,30,40,50,60])\n",
|
100 |
+
"ax.set_yticklabels([int(i) for i in ax.get_yticks()], fontsize=12)\n",
|
101 |
+
"# plt.tight_layout()\n",
|
102 |
+
"ax.legend(loc=\"best\", fontsize=12)\n"
|
103 |
+
]
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"cell_type": "code",
|
107 |
+
"execution_count": 6,
|
108 |
+
"id": "818bcb61",
|
109 |
+
"metadata": {},
|
110 |
+
"outputs": [
|
111 |
+
{
|
112 |
+
"name": "stderr",
|
113 |
+
"output_type": "stream",
|
114 |
+
"text": [
|
115 |
+
"/tmp/ipykernel_2056154/1623126519.py:21: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.\n",
|
116 |
+
" ax.set_yticklabels([int(i) for i in ax.get_yticks()], fontsize=12)\n"
|
117 |
+
]
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"data": {
|
121 |
+
"text/plain": [
|
122 |
+
"<matplotlib.legend.Legend at 0x7f69c90f7810>"
|
123 |
+
]
|
124 |
+
},
|
125 |
+
"execution_count": 6,
|
126 |
+
"metadata": {},
|
127 |
+
"output_type": "execute_result"
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"data": {
|
131 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAp4AAAGwCAYAAAAAOGVIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB4vklEQVR4nO3dd3xT5f4H8M9J2ySd6aaDltKyKdiyFGTIFJnK0ovIUEQcgKBehSsyVEAZghcHXESUoQIORESm/ECGyJIhCJRZWuiiSVfapjm/Px6TNqSFziRtP+/XKy+S5zw5+Z7SJt88U5JlWQYRERERURVT2DsAIiIiIqodmHgSERERkU0w8SQiIiIim2DiSUREREQ2wcSTiIiIiGyCiScRERER2QQTTyIiIiKyCWd7B3AvRqMRCQkJ8PT0hCRJ9g6HiIiIiO4gyzIyMjIQEhIChaLkdk2HTzwTEhIQFhZm7zCIiIiI6B6uX7+OunXrlnjc4RNPT09PAOJCvLy87BwNEREREd1Jp9MhLCzMnLeVxOETT1P3upeXFxNPIiIiIgd2r2GRnFxERERERDbBxJOIiIiIbIKJJxERERHZBBNPIiIiIrIJJp5EREREZBMOP6udiIioNigoKEB+fr69wyCy4OzsDCcnp0rbxIeJJxERkR3JsoybN28iPT3d3qEQFcvJyQmBgYHQaDQVTkCZeBIREdmRKekMDAyEm5sbt4cmhyHLMgwGA3Q6HRITE5GTk4Pg4OAKnZOJJxERkZ0UFBSYk04/Pz97h0NULE9PT6hUKqSkpCAwMBBOTk7lPhcnFxEREdmJaUynm5ubnSMhujt3d3fIslzhcchs8QSQkp0CrV5b4nGNWgN/N38bRkRERLUJu9fJ0XFyUSXJL8jH+J/G49LtSyXWifSJxFeDv4KLk4sNIyMiIiKqWWp9V7uzwhmhnqHQ5mrhrfa2umlztQj1DIWzotbn6EREREQVUusTT0mSMCpmFDQqDQxGA9yV7lA7q+GudIfBaIBGpcGomFHsBiEiIqolZs6cyc/9KlLrE08AiA2KRafwTkjKSoJslHEm6Qz+SvoLCRkJ6BTeCbFBsfYOkYiIqNpZtWoVJEky35ydnREaGorRo0fjxo0bdo0tOzsbM2fOxJ49e2z+2hERERY/l5Juq1atumv98ePH2zz2imL/MQpbPfdd24ekrCTkFuQiOz8boV6h5tbOAmMBnBTlXz6AiIjIZoxG4MwZ4PZtwMcHaN4cUNivrWn27NmoX78+9Ho9Dh06hFWrVuG3337D6dOnoVar7RJTdnY2Zs2aBQB46KGHLI69+eabeOONN6rstRcvXozMzEzz459//hlfffUVPvjgA/j7F05m7tChg/l+TEwMXnnlFYvzNGrUqMpirCpMPP9havXcFrcNLeu0xPnU8+hSr4u5tfPdfe8CAEbHjEa4JtyeoRIREZXswAFg6VLg7FkgNxdQqYCmTYGXXgKKJDK29Mgjj6BNmzYAgLFjx8Lf3x/vvfcefvzxRwwbNswuMd2Ns7MznJ2rLkV69NFHLR7fvHkTX331FR599FFEREQU+5zQ0FCMGDGiymKyFXa1/8PU6unm4oZbWbfg6+prbu1My0nDkYQj+CPhD7goOLOdiIgc1IEDwKuvAseOAd7eQESE+Pf4cVF+4ICdAxQ6deoEAIiLizOXnTt3DkOGDIGvry/UajXatGmDH3/80eJ5aWlpePXVV9GiRQt4eHjAy8sLjzzyCP7880+r19Dr9Zg5cyYaNWoEtVqN4OBgDBo0CHFxcbhy5QoCAgIAALNmzTJ3Xc+cORNA8WM8DQYD3n77bURFRUGlUiEiIgLTpk1Dbm6uRb2IiAj069cPv/32G9q1awe1Wo3IyEh8+eWXFf655eXlISsrq8LnsScmnkWYWj1v59y2GNvp6+qLT/p+ghfavIBgz8Ktoj4//jnm75+PK+lX7BQxERHVOLIM6PVlv2VnA0uWAKmpQGQk4OYGSJL4t359Uf7hh6JeWc8ty5V6iVeuXAEA+Pj4AADOnDmDBx54AGfPnsUbb7yBhQsXwt3dHY8++ii+//578/MuXbqEH374Af369cOiRYvw2muv4dSpU+jSpQsSEhLM9QoKCtCvXz/MmjULrVu3xsKFCzFp0iRotVqcPn0aAQEB+OSTTwAAjz32GFavXo3Vq1dj0KBBJcY8duxYvPXWW2jVqhU++OADdOnSBXPnzsUTTzxhVffixYsYMmQIevbsiYULF8LHxwejR4/GmTNnyv0z2717N9zc3ODh4YGIiAgsWbKk3OeyJ3a1FyFJEsbEjkFWfhbGxI6x+LYT6hWKUK9Q8+O8gjxsi9uGrPwsdI/sjgjvCDtETERENU5uLjB0aNmfp9MBJ04Azs5Aerr1cYMB2LoVeOQRwMurbOfesAGowFhMrVaLlJQU6PV6/P7775g1axZUKhX69esHAJg0aRLCw8Pxxx9/QKVSAQBeeOEFdOzYEa+//joee+wxAECLFi1w/vx5KIqMV33qqafQpEkTfPbZZ5g+fToA4Msvv8SuXbuwaNEiTJ482Vz3jTfegCzLkCQJQ4YMwfPPP4+WLVveswv7zz//xBdffIGxY8fif//7nzm+wMBALFiwAL/++iu6du1qrv/3339j79695pbdYcOGISwsDJ9//jkWLFhQ5p9fy5Yt0bFjRzRu3BipqalYtWoVXn75ZSQkJOC9994r8/nsiYnnHWKCYrBiwIp71lM6KTGn+xzsvbrXYtb7jrgd+Dv1b/Rr1I/JKBER2U5+vphUVNI+2k5OQF6eqGdjPXr0sHgcERGBNWvWoG7dukhLS8Pu3bsxe/ZsZGRkICMjw1zv4YcfxowZM3Djxg2Ehoaak1KgcJ97Dw8PNG7cGMeOHTMf+/bbb+Hv748JEyZYxVKeZZJ+/vlnAMCUKVMsyl955RUsWLAAW7ZssUg8mzVrZk46ASAgIACNGzfGpUslb1ZzN3cOORgzZgweeeQRLFq0CBMmTEDdunXLdV57YOJZAZE+kYj0iTQ/lmUZP5z7Add011BPU4+JJxERlZ1KJVoYy+r0aWDsWECjATw8rI9nZgJarehuj44ue0wV8NFHH6FRo0bQarVYuXIl9u7da04iL168CFmWMX36dHOL5Z2SkpIQGhoKo9GIJUuW4OOPP8bly5dRUFBgruPn52e+HxcXh8aNG1faBKGrV69CoVCgQYMGFuVBQUHw9vbG1atXLcrDw60nIfv4+OD27duVEo8kSZg8eTK2bduGPXv2VKtJR0w8K9kLbV/Atrht6Fa/m7ns1K1T+P3G73ikwSMW3fVERERWJKl83dqtWgHNmomJRJ6e4jwmsgwkJYk6rVrZfGmldu3amWe1P/roo+jYsSOGDx+Ov//+G0ajEQDw6quv4uGHHy72+aaEb86cOZg+fTqefvppvP322/D19YVCocDLL79sPk9VKm1rqVMJrc5yJY6VDQsLAyAmXFUnTDwrkSRJaB7YHM0Dm1uU//j3jzh04xCMshHjWo+zU3RERFSjKRRiyaRXXwXi4oCgIMDVFcjJAW7eFOt5vviiXdfzBERSNnfuXHTt2hVLly7F008/DQBwcXGx6pK/08aNG9G1a1d89tlnFuXp6ekW619GRUXh999/R35+Plxcil+Npixd7vXq1YPRaMSFCxfQtGlTc/mtW7eQnp6OevXqlfpclcXUbW+anV9dcFa7DfRu0BttQ9rikQaPmMtuZt7E2pNrkZKdYsfIiIioRunQAViwAIiNFROMrlwR/7ZqJcrttI7nnR566CG0a9cOixcvhpeXFx566CEsW7YMiYmJVnWTk5PN952cnKxaDTds2GC1C9LgwYORkpKCpUuXWp3P9Hw3NzcAImm9lz59+gAQC78XtWjRIgBA375973mO8kpLS7MYUgAA+fn5mDdvHpRKpcXY0uqALZ420DqkNVqHtLYo++XiL/j27Le4kHYBMx+aaZ/AiIio5unQAXjgAYfauag4r732GoYOHYpVq1bho48+QseOHdGiRQs8++yziIyMxK1bt3Dw4EHEx8eb1+ns168fZs+ejTFjxqBDhw44deoU1q5di8jISItzjxw5El9++SWmTJmCw4cPo1OnTsjKysLOnTvxwgsvYODAgXB1dUWzZs3wzTffoFGjRvD19UV0dDSiixn/et9992HUqFFYvnw50tPT0aVLFxw+fBhffPEFHn300SpN/n788Ue88847GDJkCOrXr4+0tDSsW7cOp0+fxpw5cxAUFFRlr10VmHjaSbOAZjifet6iFVRv0GPrha3oHtkdXqoyLnVBRERkolAALVrYO4q7GjRoEKKiorBgwQI8++yzOHLkCGbNmoVVq1YhNTUVgYGBiI2NxVtvvWV+zrRp05CVlYV169bhm2++QatWrbBlyxar7S2dnJzw888/491338W6devw7bffws/Pz5zcmqxYsQITJkzA5MmTkZeXhxkzZhSbeJrqRkZGYtWqVfj+++8RFBSEqVOnYsaMGVXzA/pHixYt0KxZM6xZswbJyclQKpWIiYnB+vXrMbQ8y27ZmSRX5kjXKqDT6aDRaKDVauFV1nXHqpkdcTvw4eEPEe4VjqV9lpZryQciIqo+9Ho9Ll++jPr169ttz3Ki0rjX72pp8zXHanev5bzV3mjo2xA9InuYk05ZlvHr5V+RnZ9t5+iIiIiIKqbMiefo0aPNe5oWdys6wPfAgQPo2LEj3NzcEBQUhIkTJyIzM7NSL6AmaRvaFoseXoSBTQaay84kn8GiQ4sw/qfxMBgNdoyOiIiIqGLKPMbzueees1ruQJZljB8/HhEREQgNFetUnjhxAt27d0fTpk2xaNEixMfHY8GCBbhw4QK2bt1aOdHXUAqp8PtAriEXdT3ronlgczgrCv+7Tt46iSb+TaB0UtojRCIiIqIyK3Pi2b59e7Rv396i7LfffkN2djaefPJJc9m0adPg4+ODPXv2mPv6IyIi8Oyzz2L79u3o1atXBUOvHVqHtEar4FbILcg1l6Vkp+DN3W/CQ+mBZf2WwVPlaccIiYiIiEqnUsZ4rlu3DpIkYfjw4QDEANMdO3ZgxIgRFgNMR44cCQ8PD6xfv74yXrbWkCQJaufCgbw3M2/Cz80P9TT1LJLO69rrKDAWFHcKIiIiIrur8HJK+fn5WL9+PTp06ICIiAgAwKlTp2AwGMzbY5mYlgA4fvx4iefLzc1Fbm5h655Op6toiDVOdGA0PhvwGdL16eayvII8vL7zdSidlHi327vcmpOIiIgcToUTz23btiE1NdWim92080BwcLBV/eDgYOzbt6/E882dOxezZs2qaFg1nkJSwNfV1/w4XhcPhaSAJEkI8ihcTPZ2zm14q73Ns+RTslOg1WtLPK9GrYG/m3+Jx4mIiIjKq8KJ57p16+Di4oJhw4aZy3JycgAAKpXKqr5arTYfL87UqVMxZcoU82OdToewsLCKhlnjRfpE4vOBnyMxMxFOCicAYtLXzD0zoTfo8WqHVxHhHYHxP43HpduX7nqerwZ/BRen4ve2JSIiIiqvCiWemZmZ2LRpEx5++GH4+fmZy11dXQHAosvcRK/Xm48XR6VSFZuw0r25OLkgXBNufpyak4rEzEQYjAYEeQTBWeGMUM9QHEs8hghNBHDH+vRXtVcR6hlqMXueiIiIqLJUaHLRDz/8YDWbHSjsYjd1uReVmJiIkJCQirwslZK/mz++fOxLzHxoJjxVnpAkCaNiRiG3IBfnUs/BKBvhrnSHu9IdBqMBGpUGo2JGccckIiIiqhIVSjzXrl0LDw8PDBgwwKI8Ojoazs7OOHLkiEV5Xl4eTpw4gZiYmIq8LJWB2lmNlnVamh839W8Kdxd36HJ1UPzz3y/LMpKyktApvBNig2LtFSoRERHVcOVOPJOTk7Fz50489thjcHNzszim0WjQo0cPrFmzBhkZGeby1atXIzMzs1pual9TuLq44rMBnyHYIxgGWeyEpMvVwc3Fja2dREREDk6SJMycOdPeYZRbuRPPb775BgaDwaqb3eTdd99FWloaunTpgk8//RRvvvkmXnrpJfTq1Qu9e/cud8BUcR3DO6JPwz5IykqCLMtIyEhA6+DWbO0kIqIqERcXh+eeew6RkZFQq9Xw8vLCgw8+iCVLltx1wjHd3d22MC9627Nnz13rz5s3z2Yxl3sWydq1axEYGGi1faZJq1atsHPnTrz++uuYPHkyPD098cwzz2Du3LnlDpYqh2ms575r+3Am6Qxu62+bx4ASERFVpi1btmDo0KFQqVQYOXIkoqOjkZeXh99++w2vvfYazpw5g+XLl9s7zGpp9erVFo+//PJL7Nixw6q8adOm5vs9e/bEyJEjLY7Hxtqu4anciefBgwfvWadjx47Yv39/eV+CqlBsUCw6hXfChr82wMfVBx4uHjDKRot94omIqHpJTgbutu+KlxcQEGC7eC5fvownnngC9erVw+7duy3W937xxRdx8eJFbNmyxXYB1TAjRoyweHzo0CHzzpEladSo0V2PVzVmGbWUJEkYEzsGfRr2wad9P8XsbrOZdBIRVWPJycDw4cDQoSXfhg8X9Wzl/fffR2ZmJj777LNiN5Vp0KABJk2aBAAwGAx4++23ERUVBZVKhYiICEybNs1qacaIiAj069cPe/bsQZs2beDq6ooWLVqYu5O/++47tGjRAmq1Gq1bt7baLXH06NHw8PDApUuX8PDDD8Pd3R0hISGYPXs2ZFm2qJuVlYVXXnkFYWFhUKlUaNy4MRYsWGBR78qVK5AkCatWrbK6vjvHY86cOROSJOHixYsYPXo0vL29odFoMGbMGGRnZ1s8Nzc3F5MnT0ZAQAA8PT0xYMAAxMfH3/NnXho5OTnQ6/WVcq6yYqZRi8UExWDFgBV4uMHD9g6FiIgqSKcDUlMBlQrw9ra+qVTiuC13ot68eTMiIyPRoUOHe9YdO3Ys3nrrLbRq1QoffPABunTpgrlz5+KJJ56wqnvx4kUMHz4c/fv3x9y5c3H79m30798fa9euxeTJkzFixAjMmjULcXFxGDZsGIxGo8XzCwoK0Lt3b9SpUwfvv/8+WrdujRkzZmDGjBnmOrIsY8CAAfjggw/Qu3dvLFq0CI0bN8Zrr71msdFNeQwbNgwZGRmYO3cuhg0bhlWrVlnt2jh27FgsXrwYvXr1wrx58+Di4oK+fftW6HUBYNWqVXB3d4erqyuaNWuGdevWVficZSI7OK1WKwOQtVqtvUOp8XINufL5lPP2DoOIqNbIycmR//rrLzknJ6eYYyXfcnOt6545I8v33SfLXbrIcp8+lrd+/WS5a1dZjo0V9Up7Xr2+/Ndm+vweOHDgPeueOHFCBiCPHTvWovzVV1+VAci7d+82l9WrV08GIB84cMBctm3bNhmA7OrqKl+9etVcvmzZMhmA/Ouvv5rLRo0aJQOQJ0yYYC4zGo1y3759ZaVSKScnJ8uyLMs//PCDDEB+5513LGIaMmSILEmSfPHiRVmWZfny5csyAPnzzz+3ui4A8owZM8yPZ8yYIQOQn376aYt6jz32mOzn52f183jhhRcs6g0fPtzqnEW9+OKL8t1Suw4dOsiLFy+WN23aJH/yySdydHS0DED++OOPS3yOyd1+V2W59Pkat6ghAMAN3Q28sesNFBgLsHLgSqid1fYOiYioVrvbyoNt2gBFGucwYgSQlgZcvgw4O4ubiacnUGRuCd54A7ijR9msYUNg0aLCxy+8AHz2Wfni1/3TtOrp6XnPuj///DMAWLUkvvLKK1iwYAG2bNmCrl27msubNWuG9u3bmx/ff//9AIBu3bohPDzcqvzSpUt46KGHLM790ksvme9LkoSXXnoJW7Zswc6dO/HEE0/g559/hpOTEyZOnGgV08aNG7F161aLc5TF+PHjLR536tQJ33//PXQ6Hby8vMw/jztf++WXX65QC+Wd826efvpptG7dGtOmTcPo0aPvurNkZWFXOwEAgj2D4ersCrWzGgkZCfYOh4iIqjkvLy8AsFjPuyRXr16FQqFAgwYNLMqDgoLg7e2Nq1evWpQXTS4BsX44AISFhRVbfvv2bYtyhUKByMhIi7JGjRoBEGM2TTGFhIRYJc6mGeJ3xlQWd8bv4+NjEafp5xEVFWVRr3HjxuV+zeIolUq89NJLSE9Px9GjRyv13CVhiycBABSSArMemoUA9wDu1U5E5AA2bCj5mOKOZqM1a4BLl8TkIW9vwN295OfOmwfckXOVeN6PPy5VqMXy8vJCSEgITp8+XernlHZZPycnpzKVyyU18VaCkmIuKCgo8Tn2iLMkpmQ9LS3NJq/HFk8yC/YMZtJJROQg1OqSb0qldV2VSiSOxd2KUqlKf16VqmLX0K9fP8TFxd1zCcZ69erBaDTiwoULFuW3bt1Ceno66tWrV7FA7mA0GnHp0iWLsvPnzwMQs+ZNMSUkJFi12J47d858HChsrUxPT7eoV5EWUdPPIy4uzqL877//Lvc5S2L6OQTYaJ0tJp5kRZZlnE0+a5dvXkREVDE5OUBWlvXNHhsE/fvf/4a7uzvGjh2LW7duWR2Pi4vDkiVL0KdPHwDA4sWLLY4v+mfAaWXM5r7T0qVLzfdlWcbSpUvh4uKC7t27AwD69OmDgoICi3oA8MEHH0CSJDzyyCMARMuuv78/9u7da1Hv4wo0F5vO/eGHH1qU3/nzKYvkYtbRysjIwOLFi+Hv74/WrVuX+9xlweYtsiDLMmbumYljN49heufpaBfazt4hERFRKXh5AX5+YsmkO5a+NPPzE/VsJSoqCuvWrcPjjz+Opk2bWuxcdODAAWzYsAGjR4/GpEmTMGrUKCxfvhzp6eno0qULDh8+jC+++AKPPvqoxcSiyqBWq/HLL79g1KhRuP/++7F161Zs2bIF06ZNM7f89e/fH127dsV//vMfXLlyBffddx+2b9+OTZs24eWXX7YYfzl27FjMmzcPY8eORZs2bbB3715zC2p5xMTE4F//+hc+/vhjaLVadOjQAbt27cLFixfLfc6PPvoIP/zwA/r374/w8HAkJiZi5cqVuHbtGlavXg3lnc3dVYSJJ1mQJAn1ferjVNIpJGYk2jscIiIqpYAAYN06x9q5CAAGDBiAkydPYv78+di0aRM++eQTqFQqtGzZEgsXLsSzzz4LAFixYgUiIyOxatUqfP/99wgKCsLUqVMt1tasLE5OTvjll1/w/PPP47XXXoOnpydmzJiBt956y1xHoVDgxx9/xFtvvYVvvvkGn3/+OSIiIjB//ny88sorFud76623kJycjI0bN2L9+vV45JFHsHXrVgQGBpY7xpUrVyIgIABr167FDz/8gG7dumHLli1WE6hK68EHH8SBAwewYsUKpKamwt3dHe3atcPKlSvRrVu3csdZVpLs4P2pOp0OGo0GWq3WPEOOqlZGbgYK5AJ4q73tHQoRUY2m1+tx+fJl1K9fH2o1l7GzhdGjR2Pjxo3IzMy0dyjVyr1+V0ubr7HFk6x4qu695hoRERFRWXFyEd1VSnYKkrNsuLEvERER1VhMPKlEP1/4GWN/HIvVJ1fbOxQiIiKqAZh4Uoka+jZEgVyA2zm3YZSN9g6HiIiowlatWsXxnXbEMZ5UooZ+DfFRn48Qrgm/d2UiIiKie2CLJ90Vk04iIiKqLEw8qVTyCvIQlxZ374pEREREJWDiSfd0Q3cDY38ci+m/TofeoLd3OERERFRNMfGkewr2DIbaWQ2VswoJGQn2DoeIiIiqKU4uontSSArM6DIDdTzqwFnBXxkiIiIqH2YRVCqhXqH2DoGIiIiqOXa1U5n9nfI3ZFm2dxhERERUzTDxpFKTZRkz98zEqztexZGEI/YOh4iIqER79uyBJEnYuHGjvUOhIph4UqlJkoR6mnpwVjhzkhEREZVIkqRS3fbs2XPX+vPmzSv2/Hv27MGgQYMQFBQEpVKJwMBA9O/fH999950Nr5LKg2M8qUwGNxuMgU0GwtfV196hEBHRHVKyU6DVa0s8rlFr4O/mX+VxrF692uLxl19+iR07dliVN23a1Hy/Z8+eGDlypMXx2NhYq3PPmDEDs2fPRsOGDfHcc8+hXr16SE1Nxc8//4zBgwdj7dq1GD58eCVeDVUmJp5UJl4qL3uHQERExcgvyMf4n8bj0u1LJdaJ9InEV4O/gouTS5XGMmLECIvHhw4dwo4dO6zKi2rUqNFdjwPAxo0bMXv2bAwZMgTr1q2Di0vhdbz22mvYtm0b8vPzKxY8VSl2tVO5pWanIiU7xd5hEBERAGeFM0I9Q6HN1cJb7W110+ZqEeoZ6tDL4uXk5ECvL3mjkunTp8PX1xcrV660SDpNHn74YfTr18+izGg04t1330XdunWhVqvRvXt3XLx40eq5v//+O3r37g2NRgM3Nzd06dIF+/fvt6gzc+ZMSJKE8+fPY8SIEdBoNAgICMD06dMhyzKuX7+OgQMHwsvLC0FBQVi4cGE5fxI1FxNPKpdfLv6CsZvHYvWfq+9dmYiIykxv0ENv0FusImIwGqA36JFfkG9VN7cgFyPvGwmNSgOD0QA3Fze4OrvC1cUVBqMBGpUGo2JGIbcgt8Tz5hXkWZw315BbtRdZxKpVq+Du7g5XV1c0a9YM69atszh+4cIFnDt3Do8++ig8PT1Lfd558+bh+++/x6uvvoqpU6fi0KFDePLJJy3q7N69G507d4ZOp8OMGTMwZ84cpKeno1u3bjh8+LDVOR9//HEYjUbMmzcP999/P9555x0sXrwYPXv2RGhoKN577z00aNAAr776Kvbu3Vu+H0gN5bhfe8ihRflEwWA0ICU7BUbZCIXE7zBERJVp6IahAIA1j62BRq0BAHx39jusPrkavSJ7YcL9E8x1R3w3ArkFuVjRfwU6hXfCtrhtyM7PxnXddfiqfSFDxsNRDyM2KBYjvh8BXa4OH/X5COGacADArku7sPSPpbg/9H682flN83lf2PICPhv4WZVfa4cOHTBs2DDUr18fCQkJ+Oijj/Dkk09Cq9Xi+eefBwCcPXsWANCiRYsynVuv1+PEiRNQKpUAAB8fH0yaNAmnT59GdHQ0ZFnG+PHj0bVrV2zduhWSJAEAnnvuOTRv3hxvvvkmtm/fbnHOdu3aYdmyZQCAcePGISIiAq+88grmzp2L119/HQDwr3/9CyEhIVi5ciU6d+5c/h9ODcPEk8qloV9D/PeR/yLCO8LeoRAR0T8kScKomFHYd20fsvOzAQD6Aj181D4YFTPKnFQ5mju7tJ9++mm0bt0a06ZNw+jRo+Hq6gqdTgcAZWrtBIAxY8aYk04A6NSpEwDg0qVLiI6OxokTJ3DhwgW8+eabSE1NtXhu9+7dsXr1ahiNRigUhQ0sY8eONd93cnJCmzZtEB8fj2eeecZc7u3tjcaNG+PSpZLH3NZGTDyp3Jh0EhFVnQ1DNwAAVE4qc9mgpoMwoPEAOElOFnXXDFpjrhvgFoBO4Z3wy8Vf0DqoNeJux6FTeCfEBokZ4p8N+MzqvN0ju6NLRBer3quP+35c+RdWCkqlEi+99BLGjx+Po0ePomPHjvDyEpNbMzIyynSu8PBwi8c+Pj4AgNu3bwMQXfgAMGrUqBLPodVqzc8r7pwajQZqtRr+/v5W5Xcms7UdE0+qsPyCfFzXXUekT6S9QyEiqjHUzmqrMmeFc7GTg+6sa2r1vJF5A+5Kd4vWzrKcV+WssiqzlbCwMABAWloaAKBJkyYAgFOnTpXpPE5OTsWWm8a4Go1GAMD8+fMRExNTbF0PD497nvNer0MCE0+qkBu6G5i2exryC/KxcuDKYt/QiIjItmKDYtEpvBO+P/c9HmvymLm1szoxdVEHBAQAEMstNW7cGJs2bcKSJUusksHyioqKAgB4eXmhR48elXJOKhlnhFCFBHkEQeWkgouTC3czIiJyEJIkYUzsGPSK6oUxsWMcdmwnACQnJ1uVZWRkYPHixfD390fr1q3N5bNmzUJqairGjh0Lg8Fg9bzt27fjp59+KtPrt27dGlFRUViwYAEyMzNLFR+VH1s8qUKcFE6Y0WUGAt0Dq3xBYiIiKr2YoBisGLDC3mHc00cffYQffvgB/fv3R3h4OBITE7Fy5Upcu3YNq1evtpgY9Pjjj+PUqVN49913cfz4cfzrX/8y71z0yy+/YNeuXVbLMN2LQqHAihUr8Mgjj6B58+YYM2YMQkNDcePGDfz666/w8vLC5s2bK/uyay0mnlRhoV6h9g6BiIiqqQcffBAHDhzAihUrkJqaCnd3d7Rr1w4rV65Et27drOq/88476NatGz788EN88sknSEtLg4+PDx544AFs2rQJAwYMKHMMDz30EA4ePIi3334bS5cuRWZmJoKCgnD//ffjueeeq4zLpH9IsoOPetXpdNBoNNBqteYZbeS4LqReQAPfBg7drUNE5Cj0ej0uX76M+vXrQ63mGHlyXPf6XS1tvsYxnlQpZFnG7P+bjSnbp+BIwhF7h0NEREQOiIknVQpJklDXqy6cFc6I18XbOxwiIiJyQBzjSZVmSLMhGNh4IPzc/OwdChERETmgcrV4Hjt2DAMGDICvry/c3NwQHR2NDz/80KLOgQMH0LFjR7i5uSEoKAgTJ04sdpkCqjm8VF5MOomIiKhEZW7x3L59O/r374/Y2FhMnz4dHh4eiIuLQ3x8YffqiRMn0L17dzRt2hSLFi1CfHw8FixYgAsXLmDr1q2VegHkmNJy0mCUjfB38793ZSIiIqoVypR46nQ6jBw5En379sXGjRuhUBTfYDpt2jT4+Phgz5495plNERERePbZZ7F9+3b06tWr4pGTw/rl4i9YdnQZutTrgpcfeNne4RAROTwHX2CGqNJ+R8vU1b5u3TrcunUL7777LhQKBbKyssx7nJrodDrs2LEDI0aMsJhOP3LkSHh4eGD9+vWVEjg5rkifSBiMBiRlJcEoG+/9BCKiWsrZWbT/FLcLD5Ejyc/PB1DynvSlVabEc+fOnfDy8sKNGzfQuHFjeHh4wMvLC88//zz0ej0A4NSpUzAYDGjTpo3Fc5VKJWJiYnD8+PEKBUyOr5FfI3zY+0PM6T4HCokLJxARlcTJyQlOTk7Q6XT2DoWoRLIsQ6vVQqVSwcWlYrsUlqmr/cKFCzAYDBg4cCCeeeYZzJ07F3v27MF///tfpKen46uvvkJiYiIAIDg42Or5wcHB2Ldv311fIzc3F7m5uebH/GOsnur71Ld3CEREDk+SJAQGBiIxMREqlQru7u7cgIMchizLyM/Ph1arRWZmJkJDK75TYZkSz8zMTGRnZ2P8+PHmWeyDBg1CXl4eli1bhtmzZyMnJwcAoFKprJ6vVqvNx0syd+5czJo1qyxhkQMzGA2I18UjwjvC3qEQETkkjUaDnJwcpKSkIDk52d7hEFlRqVQIDQ2tlB0ky5R4urq6AgD+9a9/WZQPHz4cy5Ytw8GDB+Hm5gYAFq2WJnq93nyOkkydOhVTpkwxP9bpdAgLCytLmOQgEjIS8J/d/0FeQR5WDlgJlbP1lxEiotpOkiQEBwcjMDDQPI6OyFE4OTlVuHu9qDIlniEhIThz5gzq1KljUR4YGAgAuH37NqKiogDA3OVeVGJiIkJCQu76GiqVqtjWUqp+6rjXgZPkBCfJCfG6eET5Rtk7JCIih2Ua70lUk5Vp5kfr1q0BADdu3LAoT0hIAAAEBAQgOjoazs7OOHLEcr/uvLw8nDhxAjExMRUIl6oTJ4UTZnSZgRUDVjDpJCIiorIlnsOGDQMAfPbZZxblK1asgLOzMx566CFoNBr06NEDa9asQUZGhrnO6tWrkZmZiaFDh1ZC2FRdhGnCoHRS2jsMIiIicgBl6mqPjY3F008/jZUrV8JgMKBLly7Ys2cPNmzYgKlTp5q70d9991106NABXbp0wbhx4xAfH4+FCxeiV69e6N27d5VcCDm+uLQ4RPpEcsYmERFRLSXJZVyKPj8/H3PmzMHnn3+OhIQE1KtXDy+++CJefvlli3q//fYbXn/9dRw7dgyenp4YNmwY5s6dC09PzzIFqNPpoNFooNVqK2U2FdmeLMt4Z+87OJxwGDO6zECbkDb3fhIRERFVG6XN18qceNoaE8+aYeXxldh8fjOeavkUBjUdZO9wiIiIqBIx8SSHotVrkW/Mh7+bv71DISIiokpW2nytTGM8icpLo9bYOwQiIiKyM26kTTaXrk9HanaqvcMgIiIiG2PiSTa17eI2PL3paaw+udreoRAREZGNMfEkm4rwjkC+MR+3Mm/BKBvtHQ4RERHZEMd4kk019m+MJb2XoL53fa7nSUREVMsw8SSbi/SJtHcIREREZAfsaie7MRgNuJp+1d5hEBERkY0w8SS7SMhIwLjN4zBt9zTkGnLtHQ4RERHZABNPsos67nWgkBRQSApc1123dzhERERkAxzjSXbhpHDCW13eQpBHEJROSnuHQ0RERDbAxJPsJlwTbu8QiIiIyIbY1U4O4dLtS5Bl2d5hEBERURVi4kl2Jcsy3tn7Dib9MglHE4/aOxwiIiKqQkw8ya4kSUKIZwicJCcurURERFTDcYwn2d3gpoPRv1F/BLgH2DsUIiIiqkJMPMnuNGqNvUMgIiIiG2BXOzkUrV6LtJw0e4dBREREVYCJJzmMHXE78PSPT2P1n6vtHQoRERFVASae5DDCNeHIK8jDjYwbMMpGe4dDRERElYxjPMlhNPZvjEW9FqGBbwNIkmTvcIiIiKiSMfEkh9LQr6G9QyAiIqIqwq52ckgFxgJc016zdxhERERUiZh4ksNJyEjAuM3jMG3XNOQacu0dDhEREVUSJp7kcOq41zHfv667bsdIiIiIqDJxjCc5HCeFE97q8haCPYOhdFLaOxwiIiKqJEw8ySHV865n7xCIiIiokrGrnRzelfQrkGXZ3mEQERFRBTHxJIclyzLm7puLCVsn4FjiMXuHQ0RERBXExJMcliRJCHQPhEJS4HL6ZXuHQ0RERBXEMZ7k0IY0G4L+jfsj0D3Q3qEQERFRBTHxJIemUWvsHQIRERFVEna1U7Why9UhLSfN3mEQERFROTHxpGphR9wOjNk0BmtOrrF3KERERFROTDypWgjThCGvIA/XtNdglI32DoeIiIjKgWM8qVpo4t8EC3ouQCO/RpAkyd7hEBERUTkw8aRqo7F/Y3uHQERERBXArnaqdgqMBbiuvW7vMIiIiKiMmHhStZKYkYjxP43H1F1TkVeQZ+9wiIiIqAyYeFK1EugeCKNshFE24mr6VXuHQ0RERGXAMZ5UrTgpnDC9y3QEewRD5ayydzhERERUBkw8qdqJ8I6wdwhERERUDmXqat+zZw8kSSr2dujQIYu6Bw4cQMeOHeHm5oagoCBMnDgRmZmZlRo80XXtdciybO8wiIiIqBTK1eI5ceJEtG3b1qKsQYMG5vsnTpxA9+7d0bRpUyxatAjx8fFYsGABLly4gK1bt1YsYiIAsizjvf3vYf/1/Zj10Cy0Cm5l75CIiIjoHsqVeHbq1AlDhgwp8fi0adPg4+ODPXv2wMvLCwAQERGBZ599Ftu3b0evXr3KFy3RPyRJgr+bPyRIiEuLY+JJRERUDZR7VntGRgYMBoNVuU6nw44dOzBixAhz0gkAI0eOhIeHB9avX1/elySyMKTZECzvvxxDmw+1dyhERERUCuVKPMeMGQMvLy+o1Wp07doVR44cMR87deoUDAYD2rRpY/EcpVKJmJgYHD9+/K7nzs3NhU6ns7gRFcdb7Y0gjyB7h0FERESlVKbEU6lUYvDgwViyZAk2bdqEd955B6dOnUKnTp3MCWViYiIAIDg42Or5wcHBSEhIuOtrzJ07FxqNxnwLCwsrS4hUS2XkZiAtJ83eYRAREdFdlCnx7NChAzZu3Iinn34aAwYMwBtvvIFDhw5BkiRMnToVAJCTkwMAUKms11hUq9Xm4yWZOnUqtFqt+Xb9OrdGpLvbdWkXnv7xaaw5ucbeoRAREdFdVHgdzwYNGmDgwIH47rvvUFBQAFdXVwCiy/xOer3efLwkKpWq2KSVqCShXqHQG/S4mn4VRtkIhcQNuYiIiBxRpSwgHxYWhry8PGRlZZm72E1d7kUlJiYiJCSkMl6SyKyJfxPM7zkfjf0aQ5Ike4dDREREJaiUpqFLly5BrVbDw8MD0dHRcHZ2tphwBAB5eXk4ceIEYmJiKuMliSw08W/CpJOIiMjBlSnxTE5Otir7888/8eOPP6JXr15QKBTQaDTo0aMH1qxZg4yMDHO91atXIzMzE0OHcukbqjpG2Ygbuhv2DoOIiIiKIcll2G+wW7ducHV1RYcOHRAYGIi//voLy5cvh4uLCw4ePIimTZsCAI4dO4YOHTqgWbNmGDduHOLj47Fw4UJ07twZ27ZtK1OAOp0OGo0GWq3WYl1QojslZiRixp4ZyM7PxsqBK6F0Uto7JCIiolqhtPlamVo8H330UaSkpGDRokV44YUX8M0332DQoEE4cuSIOekEgFatWmHnzp1wdXXF5MmTsXz5cjzzzDPYuHFj+a+I6B4C3ANQYCyAUTbiavpVe4dDREREdyhTi6c9sMWTyuLy7csI9gyG2llt71CIiIhqjdLma5Uyq53IUdT3qW/vEIiIiKgEXPCQaqx4XTwcvEGfiIioVmHiSTWOLMt477f38PyW53Hi5gl7h0NERET/YOJJNY4kSfBz84MECRfSLtg7HCIiIvoHx3hSjTS46WD0bdgXwZ7B9g6FiIiI/sHEk2okH1cfe4dAREREd2BXO9V4mXmZuJ1z295hEBER1Xps8aQa7bu/vsP/jv8PbUPaYtR9o6yOa9Qa+Lv52yEyIiKi2oeJJ9VY+QX5+PTop/j9xu84mnAUm89vhgTJok6kTyS+GvwVXJxc7BQlERFR7cHEk2osZ4Uzmvo3xbmUc2jg0wB35Jy4qr2KUM9QOCv4Z0BERGQLHONJNZYkSRgVMwr+bv4wyAa4K93NN4PRAI1Kg1ExoyBJ0r1PRkRERBXGxJNqtNigWHQK74SkrCQYjUbczLgJrV6LpKwkdArvhNigWHuHSEREVGsw8aQazdTqqXRS4tjNY7imu4bEjES4ubiZWzsv3b4Eo2y0d6hEREQ1HhNPqvFig2LRNaIrVE4qBLgFwGA0mFs7EzMSMemXSRj1wygYjAZ7h0pERFSjMfGkGs/U6unr6gsnhRN8XH3MrZ0JGQlwc3GzmmS04tgKLD+6HDd0N+wYORERUc3C6bxUK5jGen5/7ns81uQx89jO1iGtsW7QOmhztea6BqMBOy7tQHZ+NrpGdDWXJ2clIyMvA/W963NCEhERUTkw8aRaQZIkjIkdg6z8LIyJHWORODopnODr6mtRf9L9k3A66TSifKPMZdvjtuPrM1+jZ2RPTLx/os1iJyIiqimYeFKtERMUgxUDVtyznrPCGR3COqBDWAeLcoPRALWzGk39m5rLMnIzMG3XNMQExWB0zGg4KZwqPW4iIqKagoknUSmNihmFJ1s+aTED/uStk7iivYICuQDPtHrGXH488Th8XH1QT1OP3fJERET/YOJJVAZ37nJ0X9B9eP3B1y2SUVmWsfTwUiRlJ2H2Q7MRGxxrLmcSSkREtRkTT6IK8FB6oGN4R4syvUGPcE04cgw5aBbQzFy+5cIWbI/bjv6N+qNnVE9bh0pERGR3TDyJKpmriytmPDQDBcYCizGfxxKP4XL6ZWTkZZjLDEYDtpzfglbBrVDXqy5bRImIqEZj4klURe6caDTp/kk4cfMEmvg3MZedTT6LFcdXwPusN7589EtzucFosOrWJyIiqu74yUZkIxq1Bl0iuliUOSmc0CqoFQLdAy1aO/+949+QZRkT7p+ASJ9IW4dKRERUJZh4EtlRs4BmmNV1lkVZZl4mLqZdhAzZYn3R00mncfn2ZbQJaYNgz2Bbh0pERFRhTDyJHIyH0gOrHl2FC6kX4K32NpfvvLQTuy7vQmpOKkbHjAYgZspn5WfBQ+lhn2CJiIjKgHu1EzkgX1df3F/3fouyJv5NcF+d+9A6uLW57Kr2KoZ/Oxz/2fUfyLJs6zCJiIjKhC2eRNVE7wa90btBb4uyC6kXIEOGi5OLxRjRdafWwVvtjY7hHeGl8rJ1qERERMVi4klUjfWM6olWwa2QnZ9tLssryMPGvzYi35iPFoEtzImnVq+Fs8IZ7kp3e4VLRES1HLvaiao5Pzc/hGnCzI8NRgP+Ff0vdKjbAXW96prLN/61EcO/G471Z9bbI0wiIiK2eBLVNG4ubhjafKhVeWJmIoyyEcEehTPi03LSsOzIMrQKboWHGzxsyzCJiKgWYuJJVEu82flNJGUlWYz5PJ54HAfiDyA5O9ki8YxLi0OIZwhcXVztESoREdVQTDyJapFA90CLx038m+DJFk/Cz9XPXCbLMmbsmYHMvEwsengRF7AnIqJKw8STqBYL9QrFE9FPWJSl5aTBzcUNuQW5CNeEm8t/vvAzTt06hYcbPIyYoBgbR0pERDUBE08isuDn5ofl/ZebZ8Gb/HbtN5xKOoUWdVqYE0+9QY/TSacRHRgNtbPaThETEVF1wcSTiIqlUWssHo+6bxSOJh5Fm5A25rJTt05h9t7ZCPUMxaf9PrV1iEREVM0w8SSiUmns3xiN/RtblOkNegS6BaJlnZYW5a/veB3+bv4YHTMaAe4BtgyTiIgcGBNPIiq3TvU6oWN4R+Qb881lSVlJ+CvlLygkBV5o+4K5/O+Uv5GVn4XmAc2hclbZI1wiIrIzJp5EVCGSJEHppDQ/9nX1xbzu83BNe81il6Tvz32P/df348kWT5onNJn2ly+63ScREdVc3LmIiCqVs8IZzQOb45GGj1iUB7gFwN/NH7FBseay86nnMeqHUVh+dLmtwyQiIjtgiycR2cQzrZ7B07FPW5Qdv3kct/W3kZaTZlH+0/mfUNerLpoHNIeLk4stwyQioirExJOIbObOLvXBTQejWUAzi6WYsvKy8L9j/4NRNuKzAZ+ZF73PNeRC6aRktzwRUTVW4a72d999F5IkITo62urYgQMH0LFjR7i5uSEoKAgTJ05EZmZmRV+SiGoIFycXtKzTEo38GpnLcgw56BrRFdEB0RY7La04tgJjNo3Bnit77BApERFVhgq1eMbHx2POnDlwd3e3OnbixAl0794dTZs2xaJFixAfH48FCxbgwoUL2Lp1a0VelohqMH83f7z8wMtW5WeSzyA1JxXuLoXvN0lZSfjl4i9oHdwazQOb2zBKIiIqjwolnq+++ioeeOABFBQUICUlxeLYtGnT4OPjgz179sDLywsAEBERgWeffRbbt29Hr169KvLSRFTLLO69GH8l/4Um/k3MZX/c+AMb/tqAcynnMKf7HHN5Wk4afNQ+7JYnInIw5e5q37t3LzZu3IjFixdbHdPpdNixYwdGjBhhTjoBYOTIkfDw8MD69evL+7JEVEspnZSICYqxGA8apglD14iu6Bje0VxmMBrw3E/P4Zkfn0FKdkpxpyIiIjspV4tnQUEBJkyYgLFjx6JFixZWx0+dOgWDwYA2bdpYlCuVSsTExOD48eMlnjs3Nxe5ubnmxzqdrjwhElEt0LJOS6tdk+J18TAYDcgtyIWfq5+5fHvcdiRlJaFLvS4I04TZOlQiIkI5E89PP/0UV69exc6dO4s9npiYCAAIDg62OhYcHIx9+/aVeO65c+di1qxZ5QmLiAgR3hH4evDXSMhIsOhq33ZxG86nnUewR7A58dQb9LidcxtBHkHslicisoEyJ56pqal46623MH36dAQEFL8Hc05ODgBApbLeFk+tVpuPF2fq1KmYMmWK+bFOp0NYGFsniKj0VM4q1Pepb1HWt1FfBCUEITa4cAH7IwlH8N7+99AmuA1mPDTjnudNyU6BVq8t8bhGrYG/m3/5AyciquHKnHi++eab8PX1xYQJE0qs4+rqCgAWXeYmer3efLw4KpWq2ISViKgiutXvhm71u1mU3cq8BWeFs0XXuyzLmLNvDiJ9IjGg8QDztp/5BfkY/9N4XLp9qcTXiPSJxFeDv+Ki90REJShT4nnhwgUsX74cixcvRkJCgrlcr9cjPz8fV65cgZeXl7mL3dTlXlRiYiJCQkIqGDYRUcUNbjYYfRv1Ra6h8EtyvC4eh24cwrGbxzC42WCLcm+1N7S5WtTT1LM611XtVYR6hsJZwX05iIhKUqZZ7Tdu3IDRaMTEiRNRv3598+3333/H+fPnUb9+fcyePRvR0dFwdnbGkSNHLJ6fl5eHEydOICYmpjKvgYio3NTOamjUGvNjH1cfTGg3AcOaDYPSSWku/+LPL3A+9TwkSDAYDXBXusPNxQ3uLu4wGA3QqDQYFTOKY0WJiO6iTF/No6Oj8f3331uVv/nmm8jIyMCSJUsQFRUFjUaDHj16YM2aNZg+fTo8PT0BAKtXr0ZmZiaGDh1aOdETEVUyD6UHekVZrjMsyzKMshHeam9E+UThYPxBeKm8kJaThri0OBhlI4a3GI7YIDF+9JeLvwAA2tdtb5HUEhHVdpIsy3JFT/LQQw8hJSUFp0+fNpcdO3YMHTp0QLNmzTBu3DjEx8dj4cKF6Ny5M7Zt21bqc+t0Omg0Gmi1Wos1QYmIbC0rLwvnUs7h2c3PwkPpgZz8HMTdjoOriys2/2szWgW3AgA89f1TSNenY0nvJYj0iQQA7L26F8uPLkf7uu3xYrsXzefce3UvnBXOaFmnJTyUHna5LiKiiiptvlbhvdpL0qpVK+zcuROurq6YPHkyli9fjmeeeQYbN26sqpckIqpS7kp3tApuhU7hnZCUlYRA90D4ufmhb8O+5tZOWZbRvm57tA1pa7GOaEp2CrS5WugNeotzLj+6HHN/m4ukrCRz2f5r+/HClhfw+fHPLeoeTTiK00mnrc5BRFRdVMoo+D179hRb3rFjR+zfv78yXoKIyCFIkoRRMaOw79o+xGfEw1PpifFtxpvHdkqShBfavmD1vIejHkZMUAxUToWrdsiyjBaBLZCSnWKRpN7KuoXruuto4NvA4hwfHPoA2lwtPuz9oXm5qN/jf8cP535AbHAshjUfZq57LuUc3FzcEOwRzFn2ROQwOP2SiKiMYoNi0Sm8E74/9z0ea/KYubXzbtyV7ohURlqUSZKE1zu+blX3oYiHEOkTCU+lp7lMlmWEa8KRmp0KX1dfc3m8Lh6nk08jwN1yXeW3974NXa4O/33kv4jwjgAgWky3xW1Dyzot0a9RP4tzeCg9oFFpODmKiKoUE08iojKSJAljYscgKz8LY2LHVHqy5uvqa5Fcml5zTvc5VnU7hHUwd/mbGGUjfNQ+5n9NLqdfxsH4gxb73QPA6ztft0pS/7z5J/Ze3YvowGh0rd/VXDddnw4PpQeXjSKicuE7BxFROcQExWDFgBX2DgPBnsEI9rTcnlghKbC0z1Kruq2DW8PV2dWivlE2wklyggTJItk9n3oe2y9tR4FcYJF4vrDlBWTkZeCjPh8hXBMOAPgr+S8cSTiCxn6NcX/d+811DUYDE1QissB3BCKiWqK+T32rrUQVkgJfPvYlCowFUEiF802jA6PxZIsnzS2ggEgkcwxiy2Nvtbe5/EzSGWz4awO61+9ukXiO/H4kjLIRix5ehBBPsXHIhdQLOJ10Gg18G6BFnRZVcJVE5MiYeBIREZwUThaPmwY0RdOAphZlzgpnfDfsO+hydRbjTxv6NUS/hv3QxL+JuSy/IB8ZeRkAYFH3+M3jWH1yNXrU72GReI7ZNAYuChe83fVt1PGoAwC4kn4FcWlxiPCOQJRvVOVdLBHZDRNPIiIqNUmSrBbFjwmKQUxQjEWZs8IZXw/+Grf1ty3WJw3zCkPn8M5oFtDMXJZXkIeU7BQAgJuLm7n88I3D5iR10gOTzOUTfp4ApZMSUztNhb+bPwDghu4GEjISEOoVam5dJSLHw8STiIgqnSRJcFe6w13pblHePqw92oe1tyhzUbhgeb/lSMtJs0hSA9wCEBsUa9HamVeQhyvaKwBgMUnqt2u/Yc2pNegZ2RMT759oLp+6cyrUzmpMvH8ifFzFRKukrCTczrmNOh51LIYMEFHVY+JJRER2JUlSsZOkutbvajGxCQCcJCcs6LkAt/W34e5SmNR6qjwR5ROFul51zWV5BXk4nSx21FM6Kc3luy/vxtpTa62S1Hf2vgM3FzeMbTUWXiqx80q6Ph05+TnwdfWFyrlwDVYiKh8mnkREVG04KZzQ2L+xVXmfhn3Qp2EfizKFpMBbnd9Cuj7dogvfReGCALcABLoHmstyDbn4/cbvAIDnWj9nLv/l4i9Ye2otekX2woT7J5jLlxxaAg+lBx6PftzcSpuVlwUZMtxd3LkeKlEJmHgSEVGN5KxwRtvQtlblg5sNxuBmgy3KJEnC5AcmWyWpBcYCqJxUFktN6Q167Ly8EwDwrxb/MpdvPr8Za0+tRe+o3nix3Yvm8s+OfQZPlSf6N+oPVxdXAGLylbPCmQkq1TpMPImIqNZTOinRrX43q/InWz6J4S2GwygbLcqfiX0GWr0Wrs6u5rLMvEwAlktN6Q16/PD3DwCA/o36m8s3/rUR35z5BgMbD8SY2DHm8vVn1sNL5YWuEV3ZtU81EhNPIiKiu5AkCU5S4XJTamc1Hm3yqFW9sa3GYuR9Iy2SVKNsxNBmQ6HL1ZlbOwEgLScNBXKBxdhTvUGP1SdXAxDbppp8ffpr/HT+J/Rr1A9PRD9hLv/5ws/wVnujTUgbi/MQOTImnkRERJXkzgTQzcUNI+8baVXvuTbP4fHox+GicDGXGYwG9I7qjYy8DIsZ+ynZKdDmaiHLsrksJz8Hnxz5BACwYegGc/m3f32LXZd34eGohzGwyUAAgCzL2H99P7zV3mji34S7SZFd8bePiIjIxpwVzuY1SE08lB4WY0NNRt03Cn0a9jHPtAfEjP0OdTsgMy/TIklNyEjAdd118w5TAJBjyMF7+98DIJJUU+K5+e/N2H99P7rX746eUT0BiCT15K2T8HH1QV2vuha7WRFVBiaeREREDsxT5QlPladFmUatwdROU63qPh79ODrX64wA9wBzmd6gR3RANLLzsy2S1CvpV3Am+YzF4v/Z+dl489c3AQAbh240jzPdHrcdRxKOoFN4J3Sq1wmASFIvp1+Gj9oH3mpvTpSiUmHiSUREVEMEugdaLBMFAL6uvpjbY65V3QGNByAmKAbhmnBzWY4hB+Fe4dAb9BaTm86lnMPB+IOI9Ik0l2XnZ2PSL2JHqW+HfWseZrDnyh6cTjqN+0PvN68qIMsyUnNS4a32Zld/Lcf/fSIiolqonnc91POuZ1Hm7+aPj/p+ZFW3Z2RPRPlEoZFfI3NZZl4mvNXeKDBaTpI6eeskdlzagQC3AHPimZWfhTGbxkCChI3DNprrH7h+ABdSL6BVcCu0qNMCgEhS8435nDBVQzHxJCIiortqGtAUTQOaWpTV8aiD1Y+ttlpq6sGwBxHoHoiWdVqay7R6LZwkJ6id1RYJ5R83/sDOyzvh6uJqTjwz8zIx/LvhcHdxx5pBa8wtpEcTjuKa9hqiA6PR0K8hAJgnXLGbv/pg4klERETlducEpNYhrdE6pLVFWahXKL5//Htk5WdZ1XV1cUUT/ybmstv62wBEMlm0W/63a79h5+WdeKrlU+bEMzMvE6M3jYafqx8+6fsJnBRi2avTSadxK/MWGvk1QpgmrPIuliqMiScRERFVOUmSzNuLmnQM74iO4R0tysK8wvD14K+hy9VZlDcNaIp8Yz6ifKLMZWk5acgryENmXqY56QSAnZd2YtflXXiq5VPmxDMjNwMTf5kIP1c/vN/zfXPCfCH1AtL16YjwjrCYlEVVg4knEREROQxJkuCudIe70t2ivFdUL/SK6mVRFuoViuX9llu1pNbT1ENMnRjU0xSOYU3NSUVKdgpyDbkWrbRbLmzBrsu7MLLlSAxtPhSASFLf3P0m/Nz8ML3zdHNX/jXtNeTk5yDEM8RqpQEqHSaeREREVC05K5wR7BlsVf5Y08fwWNPHLMpCPEOwsNdC5OTnWJQHuAUgyifK4jypOam4lH4JKTkpFuNHv/3rW+y+shuj7huFIc2GABBJ6vv734efmx8m3T/JXD8pKwkFxgL4ufnZbKJUSnYKtHpticc1ao3V+rG2xsSTiIiIajylk9JiVr7Jky2fxJMtn7QoC3QPxFud30K+Md+i3F3pjgC3AAS4FXbJp+ak4sStE/BSeVkkqWtOrsGvV37F6PtGY3CzwQBEkvrpkU/h5+aHMTFjzPW1ei2cFc5wc3Er90Sp/IJ8jP9pPC7dvlRinUifSHw1+Cu4OLmUWKeqMfEkIiIiKsLNxc28FFRR41qPw7jW4yzK/Fz9MPmBySgwFljVVzmp4Ovqa36ckp2Cvdf2QqPS4OnYp83lnx3/DL9e+RVPxzxtbqnNzMvEmpNr4O/mb25dBYBcQy6UTkqrBNVZ4YxQz1Acv3ncYoiByVXtVYR6htp9HVUmnkRERETl5KnyRLf63azKp7SfgskPTLYo06g1eCb2GfMyUCbZ+dkAAG+1t7ksKSsJWy5sgUalsUg8lx5ein3X9mFc63Ho07APACArLws//v0jGvs1xr5r+2AwGqBRa8zP0eq10Kg0GBUzyu5LTzHxJCIiIqoCdyZ5vq6+eLTJo1b13uz8JvIK8izKPJWeGNZsmNVyVbf1t1EgF8DV2dVcdivrFtadXgeNSoNO4Z2wLW6buetflmUkZSXh4aiHERsUW3kXV05MPAEkJwM6XcnHvbyAAK6wQERERFXkzglIAe4BeOq+p6zqzXxoJtL16XBzcTOXqZ3V6B3VG0onJVqHtMa+a/ugy9VBo9ZAl6uDm4ubQ7R2Akw8kZwMDB8OpKaWXMfPD1i3jsknERER2ZezwtlqZnqIZwhebPciALGbU9FWT0dq7QQAxb2r1Gw6nUg6VSrA29v6plKJ43drESUiIiJyBJIkYVTMKLi5uOG67rpDtXYCTDzNXF0Bd3frm6vrvZ9LRERE5Chig2LRKbwTbufcRqfwTg7T2gmwq71YRiOgYEpORERE1ZAkSRgTOwZZ+VkYEzvGYVo7ASaeVrKygIsXgeBgIDDQ3tEQERERlV1MUAxWDFhh7zCssF3vDjodkJsLXLsmklAiIiIiqhxMPO8QHCwmFRmNouWzwHojAiIiIiIqByae/8jJES2cWVlAUBDg5CTuX75s78iIiIiIaoZan3h6eYl1OnNzgfR0ccvIEK2eBQWAVitaP7287BwoERERUTVX6ycXBQSIxeGLW6dz927gq6/EWp5paVxAnoiIiKgian3iCYiEsrikMjJSJJy5uUBIiO3jIiIiIqpJmHjehSQBkycDLi7iPhERERGVX60f43kvSmVh0inLwKVL9o2HiIiIqLpi4llKBQXABx8AL78MnDxp72iIiIiIqp8yJZ5nzpzB0KFDERkZCTc3N/j7+6Nz587YvHmzVd2zZ8+id+/e8PDwgK+vL5566ikkJydXWuC25uQEODuLVs/588XYTyIiIiIqvTIlnlevXkVGRgZGjRqFJUuWYPr06QCAAQMGYPny5eZ68fHx6Ny5My5evIg5c+bg1VdfxZYtW9CzZ0/k5eVV7hXY0HPPARERYsml+fO5uDwRERFRWUiyLMsVOUFBQQFat24NvV6Pc+fOAQBeeOEFrFq1CufOnUN4eDgAYOfOnejZsyeWLVuGcePGlfr8Op0OGo0GWq0WXg6wmOaNG2LCUU4OMGQIMGqUvSMiIiIisq/S5msVHuPp5OSEsLAwpKenm8u+/fZb9OvXz5x0AkCPHj3QqFEjrF+/vqIvaVehocDEieL+xo3AH3/YNx4iIiKi6qJciWdWVhZSUlIQFxeHDz74AFu3bkX37t0BADdu3EBSUhLatGlj9bx27drh+PHjFYvYAXTsCPTvL+4vXgzo9XYNh4iIiKhaKNc6nq+88gqWLVsGAFAoFBg0aBCWLl0KAEhMTAQABAcHWz0vODgYaWlpyM3NhUqlKvbcubm5yM3NNT/WFbelkAMYMwa4eRMYMABQq+0dDREREZHjK1fi+fLLL2PIkCFISEjA+vXrUVBQYJ40lJOTAwDFJpbqfzK0nJycEhPPuXPnYtasWeUJy6ZcXIC33rJ3FERERETVR7m62ps0aYIePXpg5MiR+Omnn5CZmYn+/ftDlmW4uroCgEWrpYn+nz5pU53iTJ06FVqt1ny7fv16eUK0ucRE4OhRe0dBRERE5LgqZQH5IUOG4I8//sD58+fNXeymLveiEhMT4evrW2JrJyBaSr28vCxuju7KFbGw/HvviVnvRERERGStUhJPU/e6VqtFaGgoAgICcOTIEat6hw8fRkxMTGW8pEMJCwMiI8USS/PmAcU09hIRERHVemVKPJOSkqzK8vPz8eWXX8LV1RXNmjUDAAwePBg//fSTRTf5rl27cP78eQwdOrSCITseJyfgtdcAjUa0fv4z74qIiIiIiijTAvKPPfYYdDodOnfujNDQUNy8eRNr167FuXPnsHDhQkyZMgUAcP36dcTGxsLb2xuTJk1CZmYm5s+fj7p16+KPP/64a1f7nRxtAfm7OXkSePNNsa3myy8D/6wwRURERFSjlTZfK1Pi+fXXX+Ozzz7DqVOnkJqaCk9PT7Ru3RoTJkzAgAEDLOqeOXMGU6ZMwW+//QalUom+ffti4cKFqFOnTpVciKP4+mtg7VpAqQQWLhRbbBIRERHVZFWSeNpDdUs8ZRmYORM4dgx46CHglVfsHRERERFR1SptvlaudTypZJIkks3Nm4HHH7d3NERERESOg4lnFfDyAp580t5REBERETmWSllOiUpWUAB88QVw4YK9IyEiIiKyLyaeVeybb4CNG8X6npmZ9o6GiIiIyH6YeFaxgQOBoCAgKQn44AMx+YiIiIioNmLiWcXc3YGpUwEXF+DwYeC77+wdEREREZF9MPG0gchIYNw4cf/LL4EzZ+wbDxEREZE9MPG0kYcfBrp2BYxG4P33Aa3W3hERERER2RYTTxuRJOCFF4CwMCA7G7h82d4REREREdkW1/G0IbUamDZNTDAKC7N3NERERES2xcTTxurWtXwsy6I1lIiIiKimY1e7HZ0+DUycCKSk2DsSIiIioqrHxNNOZBlYuRK4ckVMNjIY7B0RERERUdVi4mknkgS89ppY5/PsWbHMEhEREVFNxsTTjoKDgUmTxP3vvwcOHbJvPERERERViYmnnbVvL7bVBIDFi4GbN+0aDhEREVGVYeLpAEaPBpo0AbKygLlzgbw8e0dEREREVPmYeDoAZ2fg9dcBT08gJETsbkRERERU03AdTwfh7w8sWSL+5bqeREREVBOxxdOBBAQUJp2yDGRk2DceIiIiosrExNMBZWUB8+aJ7ne93t7REBEREVUOJp4OKD8fOHcOuH4d+Phj0fpJREREVN0x8XRA3t5icXmFAvj1V2D7dntHRERERFRxTDwdVHQ08NRT4v6yZcClS/aNh4iIiKiimHg6sMGDgbZtRdf7vHli7CcRERFRdcXE04FJEjB5MhAYCCQmAh99ZO+IiIiIiMqPiaeD8/QUs9uDgoD+/e0dDREREVH5cQH5aqBRI+CTT8QOR0RERETVFVs8q4miSee1a4BOZ79YiIiIiMqDiWc1c/CgGPe5cCHX9yQiIqLqhYlnNRMcLP49dgzYsMG+sRARERGVBRPPaiYiAnj+eXF/zRrg5Em7hkNERERUakw8q6EePYDu3UVX+/z5QFqavSMiIiIiujcmntXU88+L1s/0dJF8FhTYOyIiIiKiu2PiWU2pVMAbbwBqNXD6NLBjh70jIiIiIro7rgxZjYWGApMmAZcvAz172jsaIiIiortj4lnNdewobkRERESOjolnDZKfD2zeDAwYwF2OiKpCcvLdN2/w8gICAmwXDxFRdcP0pAZ55x2xvmdKCjBunL2jIapZkpOB4cOB1NSS6/j5AevWMfkkIioJJxfVIP36iX83bwZ++82+sRDVNDqdSDpVKsDb2/qmUonj3M6WiKhkTDxrkLZtgSFDxP0PPwRu3LBvPEQ1kasr4O5ufXN1tXdkRESOj4lnDTNiBBAdDeTkAPPmAXl59o6IqGbJyQFycwsf5+eLlSVSUwG9nn9zRER3U6bE848//sBLL72E5s2bw93dHeHh4Rg2bBjOnz9vVffs2bPo3bs3PDw84Ovri6eeegrJycmVFjgVz8kJeO01QKMBrlwBPv3U3hERVX9pacC2bcD168D580BSUuGx7Gwx/jM+XvQyTJgAvPQS8MEHwI8/AgkJ9oubiMjRlGly0XvvvYf9+/dj6NChaNmyJW7evImlS5eiVatWOHToEKKjowEA8fHx6Ny5MzQaDebMmYPMzEwsWLAAp06dwuHDh6FUKqvkYkjw9RXJ5/TpwIEDYkKEv7+9oyKqXvLygN9/B3btEpP2MjNFmVIptqs1USqBkBCxi5hWCxiNwNWr4rZ7t+iGDwkRda9fBw4fBqKixM3T0y6XRkRkN2VKPKdMmYJ169ZZJI6PP/44WrRogXnz5mHNmjUAgDlz5iArKwtHjx5FeHg4AKBdu3bo2bMnVq1ahXGccl3l7rsPmDgRaNGCSSdRWcmyaLVMTCwsi4oCrl0TGzd4eRWWu7oCdesCPj6FW9gajUBcHHDxItCoUWHdEyeAVasKHwcEFCahUVFAs2YiUSUiqqkkWS763b18WrduDQA4evQoAKBOnTro0qUL1q9fb1GvcePGCAsLw86dO0t9bp1OB41GA61WC6+i7/ZERJUkJUX0DvTvD0iSKFu+HDh0COjWTdxycoChQ8Xs9eImEpnGfm7YIJLI4hw5IlpQ4+Isk1qT998HmjYV9y9dAm7dEucKCCiMi4jIEZU2X6vwOp6yLOPWrVto3rw5AODGjRtISkpCmzZtrOq2a9cOP//8813Pl5ubi9wiI/d1XJukUhw9Krr4xo/nBxgRIJLEgwdFIvjnn6KVs3590UsAACNHAs8+W/j3kpws1ulMTbWcXFSUn59la+id2rQRNwDIyhKTkuLiCm/16xfW3blTLI0GAB4eli2jUVGi+55/y0RU3VQ48Vy7di1u3LiB2bNnAwAS//kaHxwcbFU3ODgYaWlpyM3NhUqlKvZ8c+fOxaxZsyoaFhWRmioWlzcYgHr1gD597B0RkX3IMvDXXyLZ/O030UppEh0NKIpMt1SrLZ8bECAWh6+snYvc3cVr/jM03oqfHxAZKbr3MzNFcvznn4XHv/xSdO8DonVUoQDCwsQEQyIiR1WhxPPcuXN48cUX0b59e4waNQoAkPPPO3lxiaX6n3fynJycEhPPqVOnYsqUKebHOp0OYWFhFQmz1vPzA0aNAj77DPjf/4CGDcWNqLb5+2/gjTcKH9epA3TvLrrS69S59/MDAmy3K9HgweKWny+Sz0uXxJjRuDiR/JqSTgBYvVp04yuVQESEZctovXqAi4ttYiYiupdyJ543b95E3759odFosHHjRjj98zXb9Z/BT7nF9EXp9XqLOsVRqVQlJqVUfgMHAmfOAP/3f8C0aWLGe3GTGLjXNNUUej2wf79Y7qh/f1HWuLFoRYyMFAln8+aO313t4lKYRPbsKcruHJlvGneakyOWeyq6wp2nJ7B2beF1JiSIlS/ubNElIrKFciWeWq0WjzzyCNLT07Fv3z6EmNYKQWEXe2IxI+cTExPh6+vLxNIOJAl48kng449Ft93+/UBQkHU97jVN1ZksA6dOia70AwdE8unhAfTuLRI4SQIWL3b8ZPNe7oz/jTfEtScmWo4ZjYsDwsMt68+eLZLPunULE9rISPEvZ9QTUVUrc+Kp1+vRv39/nD9/Hjt37kSzZs0sjoeGhiIgIABHjhyxeu7hw4cRExNT7mCpYgoKxJ7Sptm3+fmWCWZOTuFe00w8qTpJTBTJ5u7dYhKQSUiIaNk0GAq7m6t70lkSSRLXGxICdOokymTZchyrwSD+9mVZrCl6/TqwZ0/h8bZtgbfeKnyclcVklIgqV5kSz4KCAjz++OM4ePAgNm3ahPbt2xdbb/Dgwfjiiy9w/fp18/jMXbt24fz585g8eXLFo6ZyU6nEBISbN8XjOz9USpqtS+TIduwQyxgB4ne6UyeRcDZuXHMTzdKQJMDNrfCxszPw+efA7dtizKipVfTiRbEbk0ZTWNdgEFvwentbtopGRYmekdr8cyWi8itT4vnKK6/gxx9/RP/+/ZGWlmZeMN5kxIgRAIBp06Zhw4YN6Nq1KyZNmoTMzEzMnz8fLVq0wJgxYyoveioXPz/xYcJlUam6MRrFzO6dO4EePYDYWFHevbtIpLp3B+6/X0yyoZL5+ACtW4ubSUaG5RfPxETRS5KSIm6//154TKMR42Yff1w8No05ZTJKRPdSpsTzxIkTAIDNmzdjs2mBuSJMiWdYWBj+7//+D1OmTMEbb7wBpVKJvn37YuHChRzf6SCKJp35+WLcJz+syVHFx4uu9F9/FcNBAJGEmhLP0FBg5ky7hVcjeHpabuEZFgZ88431WqPXromtQYsmmcnJYqe0oq2iUVHi/6XoElVERGVKPPcUHQx0D82bN8e2bdvKGg/ZwaVL4oNEo7HsliOyJ6MR+OUXMW7z778Lyz08gC5dROsmVS1XV7GNZ9Gh/Hl5Yh96b+/CsosXxXjQU6fEzUSlEoviDxkiWqKJiCq8gDxVb7IsxsTpdKIlKS1NfKiUtOUfUVWS5cKWNEkCtm4FrlwRrWZt2ohks21brktpT0ql9TrA7doBH35o2TJ66ZLouj93TvSqmJw8KfarL9o6GhHBHhei2oKJZy1UdJYrIMZ7ubiILrW8PGDePJGIDhzIMVtkG1eviq70Q4dEAqNWi9+9oUPFRJguXSxb2MixODuLls369cXYW0C0WCckiCT0nx2VAYg1Ri9cEDcThUIs+xQZCQwaJBa9J6KaiYlnLeLldfe9pkNCRHcZIHY5On4ceOUVTkKiqqHTiQ0Ndu0SyYnJgQNiJyEA6NzZPrFRxSkUYq3QunUty7t1A4KDLVtHtVrRsn3lCjBgQGHd334DDh60XG+06DhUIqp+mHjWIqXZa9rTEzh6FFixQsxqZZcmVbb4eLHP+B9/iCV7ANFi1rat6EovOtOaah5fX+DBB8UNEMMr0tIKl3UKDy+se+IEsHevuJkEBhYmon36MBElqm4kWb5z8zXHotPpoNFooNVq4cWmN5u5fl2My4qMFI9l2XIRbqLSkmXRwm7aojE5GXjmGVHeoIFoAevShS3rZO3vv8VkJVPL6J0b4n39deFaxDt2iLVITS2jAQEcKkRkS6XN19jiScX6Z91/s82bRZfoa69Zd50RFSc9XeyKs2sX4O8PzJghygMCgPHjxbg/juWju2ncWNxMsrIKF75PTrbcAOPXXy1n1Ht6Wi7t9OCDXNqJyBGwxZPuKS8PGDdOjA1VqYDnnhMTCNiaQHfKzwcOHxbJ5tGjYoIJIH5vvvySy3VR1dm5E/jrL5GUXr0qFr830WiA1asL37N27BDDO0xrjTo52SdmopqktPkaE08qlbQ0YNEisWsMAHTsCLz4olhTkQgANm0SXZ+ZmYVljRuLcZudOvF3hWwnP18kn6YuehcX4NlnC4+PGSN2YwLEMk716xe2jDZsKB4TUdmwq50qla8v8PbbwHffiZaD334T469efdVycWmqPVJTxQLjplZMpVIknX5+Ytxmt24clkH24eIixg83aGB9rKAA6NChMCnV68V7mWmTgsaNgQULCuv/+quYhR8RUThOmYjKjy2eVGYXLgDz54uB/kqlmAHv42PvqMgW8vLEWpu7donltsaPFzOLATH+7sIFoGVLjqWj6kGWxftY0aWdGjYERo4Ux3NzxVqypo0N6tYVyaypdbR+fctxpkS1GVs8qco0bAgsWQJ8+qmYhMSks2aTZbH7zK5dwL59QHZ24bErVwrvu7sDMTG2jo6o/CRJrF8cEiKGg9wpM1Ms73Xxopgsd/26uP36qzjetSswZYq4bzSKoUiRkWJMKREVj4knlYurKzB5skhKTK5cEWs0duxot7CokhkMwMSJ4sPWJCBAjNs0LQROVFP5+RWuxmBaa9Q0qz4uznJr4Rs3gLfeEvf9/S1n1EdFieFKnJBJxMSTKsj0RpqbC7z/vkhQjh0Ts+A5Hqr60euBM2cKF3F3dhbJZVKSWI6me3egRQt+gFLt4+srbm3bFpYV/eKt04kZ8jduiIlLKSnA778XHh81ChgyRNzX60ULap06/Fui2oeJJ1UKJyegfXvR4rljh1jW5N//LlyAnhyXLItkc9cuMWlMrxfjduvUEcefe06siejqat84iRxN0aSxeXMx/Cg7G7h82XLc6LVrljsy/fkn8M47YnjKnS2jISEcI001GxNPqhTOzsBTTwH33QcsXCi+9b/yCjB6tNh7md/qHc+tW8Du3SLhvHWrsDwoSCzObUo8AwPtEx9RdeTmJpLQ5s0Ly3JzLZPJ1FTxnpmVBZw8KW4majUwdSrQqlXhc52cRH2imoCz2qnSZWSIyUembqbWrUXrJxcPdxxHjwIzZxY+dnUVY3O7dxfLY/GLAlHVMhhES2jRltHLl0Wi+fHHhbvH/fCD2HwhIqKwVTQyUjxWKu14AUR34Kx2shtPT+A//wG2bgU++0ws5szxnvYjy6JFJT8faNNGlDVvLrr5GjYUyWb79mJ3ISKyDWdnkUBGRgI9e4oyo1H0FoWGFta7dk387V64IG4mCoXovv/Pf0QvBVC47BORI2OLJ1Wpa9dEguPnJx7n5Yk3RhcX+8ZVGyQkiG703bvFRIfwcGDp0sIPpuxstkITOTpZBm7etGwZjYsTk5kUCmDDhsKWz08/BU6cEK2ipvVGIyO5axjZBls8ySEUHVAPiBbQc+dE13vRb/VUObKyxAShXbuAs2cLy93dRRd6fn7hhxSTTiLHJ0liZYng4MKl6mRZjBO9ccOyu/3iRVF24wawd29heWCgSERfe41jRcn++CtINqPViqRIpwMmTRKzpXv0YNdQZfrkE+D//k/clyQxQaFHD6BdO44HI6opJEmsFervb1k+fbp1y+jNm2I5NFm2TDoXLRIL5BedUe/vz/djqnrsaiebSk0Vb3imWZwdOwIvvcRt58rj+nXRjd69e+Ge6EePAitXimSzSxex7iAR1V6ZmWLR++xs4IEHRJksAyNGiEaAojw9RQLaogUwbJjtY6XqrbT5GhNPsjmjEfjuO2DNGqCgQOyE89prQNOm9o7M8WVkiG0rd+0Czp8XZYMHi2WrgMIFrdlqQUQlMW2De+daowUF4njLlsC77xbWf/998SXW1DIaGiqWeCIqimM8yWEpFGIHj5YtgfnzRVfQu++KRcs5+92a0ShaMnftEktUGQyi3MlJzFJv2bKwLhNOIroXSRJf9It+2c/LA65eFa2jnp6F5ZmZ4stuUUolUL++SEJbtQLuv982cVPNwBZPsqvsbDET84EHgA4d7B2NYzIYgJEjRWsnIN7wu3cHHnoI0GjsGhoR1XB6PbB/f2HL6KVLosykd2/gxRfF/fx88X5uahmNiOAybbUJWzypWnBzA6ZMsSw7ckQsovzgg/aJyZ60WjE56ORJsT6fJIkJAf36iSS9WzduQ0pEtqNWiy+63buLx7IslmozJaLR0YV1r14Ftm8vfCxJYiF8UyLaqlXhwvhUe7HFkxzK7dvi23NGBvDww8DYsTW/+91gAP74Q3SlHzlSOM5q7lzLN3UiIkd265ZIPC9dEks7padbHh8zBhg0SNxPSQH27BHLPEVGAvx4r/7Y4knVkqen6LrZuBHYtg04c0ZMPKqJrXyJicDmzeLN19SNDojdhLp1A+rVs1toRERlVqcO8NRThY/T0iwnMBUdU/rXX8AXXxQ+DgiwXNqpaVMufF8hRqP4AL19G/DxEdvVKRT2jgoAWzzJQZ08CSxcKN64nJ3FrO0BA6r/5JmiW9qdPg1MnSru+/qKMZvdu1svuk9EVNOcPCm2VY6LE1/C7zRtmtjKFxAL4l+9KlpHAwKq/+dAlTtwQGxTd/asGLemUolM/qWXqnQyBZdTKg8H/oZQG+l0wIcfipncgJjB/Z//lHPnDTv+3+blAYcPi670kBDg2WdFuSwDy5YBbdsCMTFcnqRa4XsFUaXJygIuX7ZsHZ0xQ+y4BADr1wOrV4v7Hh6iB6zotqAhIUxGzQ4cAF59VbTaBAcDrq5ATo5YPsbHB1iwoMqSTyaeZWWnbwh2U00+OGVZfCtesUJ0P7/0UjlOYof/W1kW62zu3i0mC2VliXJPT+DLL7ltXbVW294riOzs55/F2NGrVwuXkyvqv/8VM+gBsbFGQYHYVKPWvc8ajWJngGPHRFZeNBuXZZHRt2olsvgq+Lxn4lkWdvyGYBfV8IPz2jXx7dc00SgjQ9x3cbnHE+3wf/vLL8CmTUB8fGGZv79InLt14x711Vpte6+gQqaPSlm++81oLLme6Vhxde48VrROcceKPvdux+6sc6/4q/pWgZ+PoUDCtRQ3xCV74eItD8Qle+HGbXesHrUTzk6izuJfW2LXubpwcSpAhI8OUf7piPLTIsr3Nur56KBUGGrszwdarehec3Ep7ELz9hYTBwCxKGt6umj9aNGiLL/9pcLJRaVlNIokLC3N8huCh4dow4+LAz76SCw06YAtgmVW0gfn8eOi3EE/OIuOe5RlsZNGRoaYeFRiImej/9u8PPHN2nSKW7dE0qlUiiWhunUTi7w75K9PcR+mRR+XdL+09Yp7jYqesyKvUZHXKygAZs0Sa8nUrSuaXkx7Dvr6ioFoM2cCb74pftfK+yFT1gTE9G/RD8/ijt35AVvcMUe7OdLPh+zOGUDkP7eeAKAC5DqAtK2wjuKyC9zSgOwCFS7cBC7AA4AHgFA4SwX4pvX7IvkEkJSrgZdzNtRO+ba+lKqRmSnep5RK6/dGQHze37olejrtiC2ep06J1bm9vUVCcvZsYYIiSWJFXL1e7EsYFCS+RUiSyCJM/97tflXVk6SyxwIAr78urrFevcLzmN5Yr10TLZ9z54pyo7HwZnpTv9v9om/SBQXW98t6nhLuJ9x2xas/PIgMvQvUzgY8d/+f6B55GRLueE5iotibs2jTaNFf97w80eLbp49okix67B6JimyUce52Hey83hi/JUTi37Hb0TrgOiDLSMz0xOnboXiwzgW4OeWVeI6yvF6lJlB3/hyodHQ64MQJ8S2juD48g0HcYmK4NgwVz/Sea3o/Nj0uejMdK65OWY4VrVPcsaLPvduxO+uU9ZgdbjIk3NS6Ii7JA3G3PBF3ywMXb3lA45aPT8afNNd7/YtmOBvvgVD/XEQFZSMqJAdRIdmIDNbDw10u/udpus7yHqusn3lxt3PngIkTxfuPaUkAhaLw/Yotng7i9m2RfLi6ig/jouvaAKIsJ0eMmfDzs0+MlaXoB+dff1kfNxjE3mgvveTQH5whAP7rvwuLLj2KkykRWLIpAsd8s/BixBa4O+cWVkxNFYMrjcbiR56b/m///lssKlcKSbka7E5pid2pLZGo9/2ntACHz2nQWn8AABAMIFg6CyRV6DKrv6I/8zvfTO91vyL1quKckiRaNJ2dxUDdO2eCmb6o6XSiNdQ04KwiicG9jpX0IVcNEoNib6VJxu52rLTJWGUkBmX9mZviIpuQ8M/7MICO/5TJ8j9j7T0izY+1PwNyHhAPID4J+L8kACdE/SZNxJbOJjk5Ik1waHXqiITy+HEx9Kfo75wsiyFBrVqJOR12xMTTx0eMcczJEd8QGjSwbCnKyRHfEkaNEq2EFW0BLK5+0dbB8rYylqae6Y3YzU3cL9rqJUmiVTArS+zDGBpa+S2uldji6ydJeFtywreHgDV7w7BPDsffBT3w6qOX0bRetojtyhXgvffE/6ubW+F1mpj+b194wXog9h1JR3auAu9+FoyTF91Er40noFbKeDAmC90fyEJ0g46AolP5kp17Ha+MpKm4a6vK166JTp0Sq2KbekfuZGpNeOONKmlNIKLykyTLP1tJEtt73r4tFrwvOqP+1i3L/eoB4LnnxMdP0bVGo6JEe5TDvOUpFKLh6NVXxYUEBVmPQ3/xRbuP+2JXu2kW2PHj4rfozm8IVTwLzKbuHFZwpypuhq8qf/8thqbevCm2Y1u69J//qgr838qyGMpnGj8qy+Lv+do1MV6ze3cxFLam76pERdSm9wqiWiwjQ2xRXKeOeKzVioXxi8uWvLzE58HTTxeWybKdk9HiJhA3ayaSTgdYx5MtntXkG0KlaN5cjOEs6YPTQZrhy6pxY2DJEmD5cqB//yL/VQoFkp98Gbpz84GzWrHysEol/hCTkwFNNLyGT0JAkf/bxESxBNLu3aLXdPVqkVxKEjBhgphDYlpbjmqZ2vReQVSLeXpatnhqNGIt0cuXRaeHqWX02jXxOWHa5hgQCevTTwP16xe2ijZoUNiJaBMdOohJsw66ZCJbPE3s9A3B5kyz2m/fLv6D00FntZfH118DixYBBm2mSDT1esAoAwpJZJMBAfAL98CKFWLNzV27xN+piZubmMTcpIn9roEcUG15ryCiu8rLE2uLurkV9o6dOSNG29xJpRLJaJ8+QNeuto3TVriOZ3lUk0XVK6wWfHBeuQKMGyd2PfL1BeqFy1DkZIoJVM7OgLsHcvQS0tLEPBClUjxPksSk5O7dxRdGlcqOF0GOq7a8VxBRmRgMYjk9U6voxYuipVSvF8efew7o10/cv3IFWLzYcsxo/fqFn0fVDbvay0OhqFZjG8vNwZvhK0PdukDPniLx1OmAK1clREV5QqEQOYObGwBJ/IHn54st2Lp3F99Eq/viBWQDteW9gojKxNlZNGZERIjPFEB85iQmikS0UaPCukW77U0UCjFXITJSJKhF65dGcnLh8sLF8fISo87siS2eVGPFxYk/3PT0wkn9sizmVjVqJCbwp6eL8aEdOzrQzEQiIqrx0tPFyoZFW0e12sLjM2cCrVuL+ydOiG1Di7aO3jnzPjkZGD5crCRYEj8/YN26qkk+2eJJBDGENTBQDGFNT7deWQgAQkKYdBIRkW15e4sOSNMIN1kWnZCmRNS00yUgFqXZt0/cTAIDRQIaGQn06iVaOlNTxRCx4tYczckRx3U6+7Z6lrlvNTMzEzNmzEDv3r3h6+sLSZKwatWqYuuePXsWvXv3hoeHB3x9ffHUU08hOTm5ojETlYmTk2jhbNZMjN8s+sdMRETkCCRJzElo2xZ44gnLfVzatxfLiXfsKHa7BoCkJODgQWDtWjFdwyQvTySwBgPg7l54c5QF8Mvc4pmSkoLZs2cjPDwc9913H/bs2VNsvfj4eHTu3BkajQZz5sxBZmYmFixYgFOnTuHw4cNQVtfRs1RtFbd0KRERkaNr0EDcTLKyxKSluDgxsz4oSCyED4jevcxM0aKq0dgj2rsrc+IZHByMxMREBAUF4ciRI2jbtm2x9ebMmYOsrCwcPXoU4eHhAIB27dqhZ8+eWLVqFcaNG1exyIlKKSenbOVERESOzN0diI4WtztpNKJ1093d9nGVRpm72lUqFYKCgu5Z79tvv0W/fv3MSScA9OjRA40aNcL69evL+rJEZeblJQZS5+aKb4B33nJzxXHOWSMioprC21ssy+Som51UyeSiGzduICkpCW3atLE61q5dO/z8888lPjc3Nxe5RQYr6O62LgDRXQQEiNl7jr60BBERUW1RJYlnYmIiANEtf6fg4GCkpaUhNzcXqmJW5547dy5mzZpVFWFRLRQQwMSSiIhqD0cfXlYlK4bn/HN1xSWWarXaos6dpk6dCq1Wa75dv369KkIkIiIiqjGqy/CyKmnxdP1nzn7RLnMT/T/7RrmWMK9fpVIVm7ASERERUfGqy/CyKkk8TV3spi73ohITE+Hr68vkkoiIiKgSVYfhZVXS1R4aGoqAgAAcOXLE6tjhw4cRExNTFS9LRERERA6sShJPABg8eDB++uknizGau3btwvnz5zF06NCqelkiIiIiclDl6mpfunQp0tPTkZCQAADYvHkz4uPjAQATJkyARqPBtGnTsGHDBnTt2hWTJk1CZmYm5s+fjxYtWmDMmDGVdwVEREREVC1IsizLZX1SREQErl69Wuyxy5cvIyIiAgBw5swZTJkyBb/99huUSiX69u2LhQsXok6dOqV+LZ1OB41GA61WCy97T8UiIiIiIiulzdfKlXjaEhNPIiIiIsdW2nytysZ4EhEREREVxcSTiIiIiGyCiScRERER2QQTTyIiIiKyiSrZuagymeY+6e62BxQRERER2Y0pT7vXnHWHTzwzMjIAAGFhYXaOhIiIiIjuJiMjAxqNpsTjDr+cktFoREJCAjw9PSFJUpW/nk6nQ1hYGK5fv17jl2+qTdcK1L7rparF3ycicmS2fo+SZRkZGRkICQmBQlHySE6Hb/FUKBSoW7euzV/Xy8ur1nyY1KZrBWrf9VLV4u8TETkyW75H3a2l04STi4iIiIjIJph4EhEREZFNMPG8g0qlwowZM6BSqewdSpWrTdcK1L7rparF3ycicmSO+h7l8JOLiIiIiKhmYIsnEREREdkEE08iIiIisgkmnkRERERkE0w8iYiIiMgmmHgSERERkU3UusQzMzMTM2bMQO/eveHr6wtJkrBq1api6549exa9e/eGh4cHfH198dRTTyE5Odm2AVfAmTNnMHToUERGRsLNzQ3+/v7o3LkzNm/ebFW3ul/rnj17IElSsbdDhw5Z1D1w4AA6duwINzc3BAUFYeLEicjMzLRT5OQIquJ9wWg04v3330f9+vWhVqvRsmVLfPXVV1V8JURU0/zxxx946aWX0Lx5c7i7uyM8PBzDhg3D+fPnrepWh/cnh98ys7KlpKRg9uzZCA8Px3333Yc9e/YUWy8+Ph6dO3eGRqPBnDlzkJmZiQULFuDUqVM4fPgwlEqlbQMvh6tXryIjIwOjRo1CSEgIsrOz8e2332LAgAFYtmwZxo0bB6BmXKvJxIkT0bZtW4uyBg0amO+fOHEC3bt3R9OmTbFo0SLEx8djwYIFuHDhArZu3WrrcMlBVMX7wn/+8x/MmzcPzz77LNq2bYtNmzZh+PDhkCQJTzzxhI2ujIiqu/feew/79+/H0KFD0bJlS9y8eRNLly5Fq1atcOjQIURHRwOoRu9Pci2j1+vlxMREWZZl+Y8//pAByJ9//rlVveeff152dXWVr169ai7bsWOHDEBetmyZrcKtdAaDQb7vvvvkxo0bm8tqwrX++uuvMgB5w4YNd633yCOPyMHBwbJWqzWX/e9//5MByNu2bavqMMlBVfb7Qnx8vOzi4iK/+OKL5jKj0Sh36tRJrlu3rmwwGKruYoioRtm/f7+cm5trUXb+/HlZpVLJTz75pLmsurw/1bqudpVKhaCgoHvW+/bbb9GvXz+Eh4eby3r06IFGjRph/fr1VRlilXJyckJYWBjS09PNZTXtWjMyMmAwGKzKdTodduzYgREjRsDLy8tcPnLkSHh4eFTLa6XKUdnvC5s2bUJ+fj5eeOEFc5kkSXj++ecRHx+PgwcPVu4FEFGN1aFDB6uex4YNG6J58+Y4e/asuay6vD/VusSzNG7cuIGkpCS0adPG6li7du1w/PhxO0RVfllZWUhJSUFcXBw++OADbN26Fd27dwdQ8651zJgx8PLyglqtRteuXXHkyBHzsVOnTsFgMFhdq1KpRExMTLW7VrKtsvytHD9+HO7u7mjatKlVPdNxIqLykmUZt27dgr+/P4Dq9f5U68Z4lkZiYiIAIDg42OpYcHAw0tLSkJub63D7n5bklVdewbJlywAACoUCgwYNwtKlSwHUnGtVKpUYPHgw+vTpA39/f/z1119YsGABOnXqhAMHDiA2Nvae17pv3z5bh03VSFn+VhITE1GnTh1IkmRVDwASEhKqPmAiqrHWrl2LGzduYPbs2QCq1/sTE89i5OTkAECxyZZarTbXcfRkzOTll1/GkCFDkJCQgPXr16OgoAB5eXkAas61dujQAR06dDA/HjBgAIYMGYKWLVti6tSp+OWXX+55rabjRMUpy99KSX8zResREZXHuXPn8OKLL6J9+/YYNWoUgOr1/sSu9mK4uroCAHJzc62O6fV6izrVQZMmTdCjRw+MHDkSP/30EzIzM9G/f3/IslzjrrWoBg0aYODAgfj1119RUFBwz2utrtdJtlGWvxVXV9ca+TdFRPZ18+ZN9O3bFxqNBhs3boSTkxOA6vX+xMSzGKbmZlPTdVGJiYnw9fV1+BbAuxkyZAj++OMPnD9/vsZfa1hYGPLy8pCVlXXPaw0JCbF1eFSNlOVvJTg4GDdv3oQsy1b1APB3jYjKTKvV4pFHHkF6ejp++eUXi/eR6vT+xMSzGKGhoQgICLCYmGJy+PBhxMTE2D6oSmRqRtdqtTX+Wi9dugS1Wg0PDw9ER0fD2dnZ6lrz8vJw4sSJan+tVLXK8rcSExOD7OxsixmnAPD777+bjxMRlZZer0f//v1x/vx5/PTTT2jWrJnF8er0/sTEswSDBw/GTz/9hOvXr5vLdu3ahfPnz2Po0KF2jKz0kpKSrMry8/Px5ZdfwtXV1fyLWxOutbidGf7880/8+OOP6NWrFxQKBTQaDXr06IE1a9YgIyPDXG/16tXIzMysNtdK9lPav5WBAwfCxcUFH3/8sblMlmV8+umnCA0NtRiPTER0NwUFBXj88cdx8OBBbNiwAe3bty+2XnV5f5LkO9taa4GlS5ciPT0dCQkJ+OSTTzBo0CDExsYCACZMmACNRoPr168jNjYW3t7emDRpEjIzMzF//nzUrVsXf/zxR7Xofn7ssceg0+nQuXNnhIaG4ubNm1i7di3OnTuHhQsXYsqUKQBQI661W7ducHV1RYcOHRAYGIi//voLy5cvh4uLCw4ePGheNuLYsWPo0KEDmjVrhnHjxiE+Ph4LFy5E586dsW3bNjtfBdlTZb8v/Pvf/8b8+fMxbtw4tG3bFj/88AO2bNmCtWvXYvjw4fa6TCKqZl5++WUsWbIE/fv3x7Bhw6yOjxgxAkDZPsvt+v5UpcvTO6h69erJAIq9Xb582Vzv9OnTcq9evWQ3NzfZ29tbfvLJJ+WbN2/aL/Ay+uqrr+QePXrIderUkZ2dnWUfHx+5R48e8qZNm6zqVvdrXbJkidyuXTvZ19dXdnZ2loODg+URI0bIFy5csKq7b98+uUOHDrJarZYDAgLkF198UdbpdHaImhxJZb8vFBQUyHPmzJHr1asnK5VKuXnz5vKaNWtseEVEVBN06dKlxPemO9O46vD+VCtbPImIiIjI9jjGk4iIiIhsgoknEREREdkEE08iIiIisgkmnkRERERkE0w8iYiIiMgmmHgSERERkU0w8SQiIiIim2DiSUREREQ2wcSTiIiIiGyCiScRERER2QQTTyIiIiKyCSaeRERERGQT/w/k6bnsFkKm2gAAAABJRU5ErkJggg==",
|
132 |
+
"text/plain": [
|
133 |
+
"<Figure size 800x500 with 1 Axes>"
|
134 |
+
]
|
135 |
+
},
|
136 |
+
"metadata": {},
|
137 |
+
"output_type": "display_data"
|
138 |
+
}
|
139 |
+
],
|
140 |
+
"source": [
|
141 |
+
"# Top5 invalidity\n",
|
142 |
+
"CompoundT5 = [32.75, 18.76, 11.07, 20.99, 10.62]\n",
|
143 |
+
"ReactionT5 = [12.5, 12.4, 12.5, 12.6, 12.9]\n",
|
144 |
+
"T5Chem = [33.73, 38.94, 46.23, 69.05, 73.41][::-1]\n",
|
145 |
+
"\n",
|
146 |
+
"\n",
|
147 |
+
"# plot\n",
|
148 |
+
"import matplotlib.pyplot as plt\n",
|
149 |
+
"fig, ax = plt.subplots(1, figsize=(8, 5))\n",
|
150 |
+
"\n",
|
151 |
+
"\n",
|
152 |
+
"ax.plot([10,30,50,100,200], ReactionT5, \"o-\", label='ReactionT5', color='red', alpha=0.7)\n",
|
153 |
+
"ax.plot([10,30,50,100,200], CompoundT5, \"s--\", label='CompoundT5', color='blue', alpha=0.7)\n",
|
154 |
+
"ax.plot([10,30,50,100,200], T5Chem, \"v:\", label='T5Chem', color='green', alpha=0.7)\n",
|
155 |
+
"\n",
|
156 |
+
"\n",
|
157 |
+
"# plt.ylim(0, 35)\n",
|
158 |
+
"ax.set_xticks([10,30,50,100,200])\n",
|
159 |
+
"ax.set_xticklabels([10,30,50,100,200], fontsize=12)\n",
|
160 |
+
"# ax.set_yticks([10,20,30,40,50,60])\n",
|
161 |
+
"ax.set_yticklabels([int(i) for i in ax.get_yticks()], fontsize=12)\n",
|
162 |
+
"# plt.tight_layout()\n",
|
163 |
+
"ax.legend(loc=\"best\", fontsize=12)\n"
|
164 |
+
]
|
165 |
+
}
|
166 |
+
],
|
167 |
+
"metadata": {
|
168 |
+
"kernelspec": {
|
169 |
+
"display_name": "reactiont5",
|
170 |
+
"language": "python",
|
171 |
+
"name": "python3"
|
172 |
+
},
|
173 |
+
"language_info": {
|
174 |
+
"codemirror_mode": {
|
175 |
+
"name": "ipython",
|
176 |
+
"version": 3
|
177 |
+
},
|
178 |
+
"file_extension": ".py",
|
179 |
+
"mimetype": "text/x-python",
|
180 |
+
"name": "python",
|
181 |
+
"nbconvert_exporter": "python",
|
182 |
+
"pygments_lexer": "ipython3",
|
183 |
+
"version": "3.8.18"
|
184 |
+
},
|
185 |
+
"varInspector": {
|
186 |
+
"cols": {
|
187 |
+
"lenName": 16,
|
188 |
+
"lenType": 16,
|
189 |
+
"lenVar": 40
|
190 |
+
},
|
191 |
+
"kernels_config": {
|
192 |
+
"python": {
|
193 |
+
"delete_cmd_postfix": "",
|
194 |
+
"delete_cmd_prefix": "del ",
|
195 |
+
"library": "var_list.py",
|
196 |
+
"varRefreshCmd": "print(var_dic_list())"
|
197 |
+
},
|
198 |
+
"r": {
|
199 |
+
"delete_cmd_postfix": ") ",
|
200 |
+
"delete_cmd_prefix": "rm(",
|
201 |
+
"library": "var_list.r",
|
202 |
+
"varRefreshCmd": "cat(var_dic_list()) "
|
203 |
+
}
|
204 |
+
},
|
205 |
+
"types_to_exclude": [
|
206 |
+
"module",
|
207 |
+
"function",
|
208 |
+
"builtin_function_or_method",
|
209 |
+
"instance",
|
210 |
+
"_Feature"
|
211 |
+
],
|
212 |
+
"window_display": false
|
213 |
+
}
|
214 |
+
},
|
215 |
+
"nbformat": 4,
|
216 |
+
"nbformat_minor": 5
|
217 |
+
}
|
task_forward/calculate_accuracy.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import warnings
|
5 |
+
|
6 |
+
import pandas as pd
|
7 |
+
import rdkit
|
8 |
+
from rdkit import Chem
|
9 |
+
from transformers import AutoTokenizer
|
10 |
+
|
11 |
+
rdkit.RDLogger.DisableLog("rdApp.*")
|
12 |
+
|
13 |
+
|
14 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
15 |
+
from utils import canonicalize, seed_everything
|
16 |
+
|
17 |
+
warnings.filterwarnings("ignore")
|
18 |
+
|
19 |
+
|
20 |
+
def parse_args():
|
21 |
+
parser = argparse.ArgumentParser(
|
22 |
+
description="Script for reaction retrosynthesis prediction."
|
23 |
+
)
|
24 |
+
parser.add_argument(
|
25 |
+
"--input_data",
|
26 |
+
type=str,
|
27 |
+
required=True,
|
28 |
+
help="Path to the input data.",
|
29 |
+
)
|
30 |
+
parser.add_argument(
|
31 |
+
"--target_data",
|
32 |
+
type=str,
|
33 |
+
required=True,
|
34 |
+
help="Path to the target data.",
|
35 |
+
)
|
36 |
+
parser.add_argument(
|
37 |
+
"--target_col",
|
38 |
+
type=str,
|
39 |
+
required=True,
|
40 |
+
help="Name of target column.",
|
41 |
+
)
|
42 |
+
parser.add_argument(
|
43 |
+
"--model_name_or_path",
|
44 |
+
type=str,
|
45 |
+
default="sagawa/ReactionT5v2-retrosynthesis",
|
46 |
+
help="Name or path of the finetuned model for prediction. Can be a local model or one from Hugging Face.",
|
47 |
+
)
|
48 |
+
parser.add_argument(
|
49 |
+
"--num_beams", type=int, default=5, help="Number of beams used for beam search."
|
50 |
+
)
|
51 |
+
parser.add_argument(
|
52 |
+
"--seed", type=int, default=42, help="Seed for reproducibility."
|
53 |
+
)
|
54 |
+
return parser.parse_args()
|
55 |
+
|
56 |
+
|
57 |
+
def remove_space(row):
|
58 |
+
for i in range(5):
|
59 |
+
row[f"{i}th"] = row[f"{i}th"].replace(" ", "")
|
60 |
+
return row
|
61 |
+
|
62 |
+
|
63 |
+
if __name__ == "__main__":
|
64 |
+
CFG = parse_args()
|
65 |
+
|
66 |
+
seed_everything(seed=CFG.seed)
|
67 |
+
|
68 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
69 |
+
os.path.abspath(CFG.model_name_or_path)
|
70 |
+
if os.path.exists(CFG.model_name_or_path)
|
71 |
+
else CFG.model_name_or_path,
|
72 |
+
return_tensors="pt",
|
73 |
+
)
|
74 |
+
|
75 |
+
df = pd.read_csv(CFG.input_data)
|
76 |
+
df[[f"{i}th" for i in range(CFG.num_beams)]] = df[
|
77 |
+
[f"{i}th" for i in range(CFG.num_beams)]
|
78 |
+
].fillna(" ")
|
79 |
+
df["target"] = pd.read_csv(CFG.target_data)[CFG.target_col].values
|
80 |
+
df = df.apply(remove_space, axis=1)
|
81 |
+
|
82 |
+
top_k_invalidity = CFG.num_beams
|
83 |
+
|
84 |
+
top1, top2, top3, top5 = [], [], [], []
|
85 |
+
invalidity = []
|
86 |
+
|
87 |
+
for idx, row in df.iterrows():
|
88 |
+
target = canonicalize(row["target"])
|
89 |
+
if canonicalize(row["0th"]) == target:
|
90 |
+
top1.append(1)
|
91 |
+
top2.append(1)
|
92 |
+
top3.append(1)
|
93 |
+
top5.append(1)
|
94 |
+
elif canonicalize(row["1th"]) == target:
|
95 |
+
top1.append(0)
|
96 |
+
top2.append(1)
|
97 |
+
top3.append(1)
|
98 |
+
top5.append(1)
|
99 |
+
elif canonicalize(row["2th"]) == target:
|
100 |
+
top1.append(0)
|
101 |
+
top2.append(0)
|
102 |
+
top3.append(1)
|
103 |
+
top5.append(1)
|
104 |
+
elif canonicalize(row["3th"]) == target:
|
105 |
+
top1.append(0)
|
106 |
+
top2.append(0)
|
107 |
+
top3.append(0)
|
108 |
+
top5.append(1)
|
109 |
+
elif canonicalize(row["4th"]) == target:
|
110 |
+
top1.append(0)
|
111 |
+
top2.append(0)
|
112 |
+
top3.append(0)
|
113 |
+
top5.append(1)
|
114 |
+
else:
|
115 |
+
top1.append(0)
|
116 |
+
top2.append(0)
|
117 |
+
top3.append(0)
|
118 |
+
top5.append(0)
|
119 |
+
|
120 |
+
input_compound = row["input"]
|
121 |
+
output = [row[f"{i}th"] for i in range(top_k_invalidity)]
|
122 |
+
inval_score = 0
|
123 |
+
for ith, out in enumerate(output):
|
124 |
+
mol = Chem.MolFromSmiles(out.rstrip("."))
|
125 |
+
if not isinstance(mol, Chem.rdchem.Mol):
|
126 |
+
inval_score += 1
|
127 |
+
invalidity.append(inval_score)
|
128 |
+
print(CFG.input_data)
|
129 |
+
print(f"Top 1 accuracy: {sum(top1) / len(top1)}")
|
130 |
+
print(f"Top 2 accuracy: {sum(top2) / len(top2)}")
|
131 |
+
print(f"Top 3 accuracy: {sum(top3) / len(top3)}")
|
132 |
+
print(f"Top 5 accuracy: {sum(top5) / len(top5)}")
|
133 |
+
print(
|
134 |
+
f"Top {top_k_invalidity} Invalidity: {sum(invalidity) / (len(invalidity) * top_k_invalidity) * 100}"
|
135 |
+
)
|
task_forward/finetune.py
ADDED
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import warnings
|
5 |
+
|
6 |
+
import datasets
|
7 |
+
import pandas as pd
|
8 |
+
import torch
|
9 |
+
from datasets import Dataset, DatasetDict
|
10 |
+
from transformers import (
|
11 |
+
AutoModelForSeq2SeqLM,
|
12 |
+
AutoTokenizer,
|
13 |
+
DataCollatorForSeq2Seq,
|
14 |
+
EarlyStoppingCallback,
|
15 |
+
Seq2SeqTrainer,
|
16 |
+
Seq2SeqTrainingArguments,
|
17 |
+
)
|
18 |
+
|
19 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
20 |
+
from train import preprocess_df
|
21 |
+
from utils import filter_out, get_accuracy_score, preprocess_dataset, seed_everything
|
22 |
+
|
23 |
+
# Suppress warnings and disable progress bars
|
24 |
+
warnings.filterwarnings("ignore")
|
25 |
+
datasets.utils.logging.disable_progress_bar()
|
26 |
+
|
27 |
+
|
28 |
+
def parse_args():
|
29 |
+
"""Parse command line arguments."""
|
30 |
+
parser = argparse.ArgumentParser(
|
31 |
+
description="Training script for reaction prediction model."
|
32 |
+
)
|
33 |
+
parser.add_argument(
|
34 |
+
"--train_data_path", type=str, required=True, help="Path to training data CSV."
|
35 |
+
)
|
36 |
+
parser.add_argument(
|
37 |
+
"--valid_data_path",
|
38 |
+
type=str,
|
39 |
+
required=True,
|
40 |
+
help="Path to validation data CSV.",
|
41 |
+
)
|
42 |
+
parser.add_argument(
|
43 |
+
"--similar_reaction_data_path",
|
44 |
+
type=str,
|
45 |
+
required=False,
|
46 |
+
help="Path to similar data CSV.",
|
47 |
+
)
|
48 |
+
parser.add_argument(
|
49 |
+
"--output_dir", type=str, default="t5", help="Path of the output directory."
|
50 |
+
)
|
51 |
+
parser.add_argument(
|
52 |
+
"--model_name_or_path",
|
53 |
+
type=str,
|
54 |
+
default="sagawa/ReactionT5v2-forward",
|
55 |
+
help="The name of a pretrained model or path to a model which you want to finetune on your dataset. You can use your local models or models uploaded to hugging face.",
|
56 |
+
)
|
57 |
+
parser.add_argument(
|
58 |
+
"--debug", action="store_true", default=False, help="Enable debug mode."
|
59 |
+
)
|
60 |
+
parser.add_argument(
|
61 |
+
"--epochs", type=int, default=3, help="Number of epochs for training."
|
62 |
+
)
|
63 |
+
parser.add_argument("--lr", type=float, default=2e-5, help="Learning rate.")
|
64 |
+
parser.add_argument("--batch_size", type=int, default=32, help="Batch size.")
|
65 |
+
parser.add_argument(
|
66 |
+
"--input_max_length", type=int, default=200, help="Max input token length."
|
67 |
+
)
|
68 |
+
parser.add_argument(
|
69 |
+
"--target_max_length", type=int, default=150, help="Max target token length."
|
70 |
+
)
|
71 |
+
parser.add_argument(
|
72 |
+
"--eval_beams",
|
73 |
+
type=int,
|
74 |
+
default=5,
|
75 |
+
help="Number of beams used for beam search during evaluation.",
|
76 |
+
)
|
77 |
+
parser.add_argument(
|
78 |
+
"--target_column",
|
79 |
+
type=str,
|
80 |
+
default="PRODUCT",
|
81 |
+
help="Target column name.",
|
82 |
+
)
|
83 |
+
parser.add_argument(
|
84 |
+
"--weight_decay",
|
85 |
+
type=float,
|
86 |
+
default=0.01,
|
87 |
+
help="Weight decay.",
|
88 |
+
)
|
89 |
+
parser.add_argument(
|
90 |
+
"--evaluation_strategy",
|
91 |
+
type=str,
|
92 |
+
default="epoch",
|
93 |
+
help="Evaluation strategy used during training. Select from 'no', 'steps', or 'epoch'. If you select 'steps', also give --eval_steps.",
|
94 |
+
)
|
95 |
+
parser.add_argument(
|
96 |
+
"--eval_steps",
|
97 |
+
type=int,
|
98 |
+
help="Evaluation steps.",
|
99 |
+
)
|
100 |
+
parser.add_argument(
|
101 |
+
"--save_strategy",
|
102 |
+
type=str,
|
103 |
+
default="epoch",
|
104 |
+
help="Save strategy used during training. Select from 'no', 'steps', or 'epoch'. If you select 'steps', also give --save_steps.",
|
105 |
+
)
|
106 |
+
parser.add_argument(
|
107 |
+
"--save_steps",
|
108 |
+
type=int,
|
109 |
+
default=500,
|
110 |
+
help="Save steps.",
|
111 |
+
)
|
112 |
+
parser.add_argument(
|
113 |
+
"--logging_strategy",
|
114 |
+
type=str,
|
115 |
+
default="epoch",
|
116 |
+
help="Logging strategy used during training. Select from 'no', 'steps', or 'epoch'. If you select 'steps', also give --logging_steps.",
|
117 |
+
)
|
118 |
+
parser.add_argument(
|
119 |
+
"--logging_steps",
|
120 |
+
type=int,
|
121 |
+
default=500,
|
122 |
+
help="Logging steps.",
|
123 |
+
)
|
124 |
+
parser.add_argument(
|
125 |
+
"--save_total_limit",
|
126 |
+
type=int,
|
127 |
+
default=2,
|
128 |
+
help="Limit of saved checkpoints.",
|
129 |
+
)
|
130 |
+
parser.add_argument(
|
131 |
+
"--fp16",
|
132 |
+
action="store_true",
|
133 |
+
default=False,
|
134 |
+
help="Enable fp16 training.",
|
135 |
+
)
|
136 |
+
parser.add_argument(
|
137 |
+
"--disable_tqdm",
|
138 |
+
action="store_true",
|
139 |
+
default=False,
|
140 |
+
help="Disable tqdm.",
|
141 |
+
)
|
142 |
+
parser.add_argument(
|
143 |
+
"--seed", type=int, default=42, help="Set seed for reproducibility."
|
144 |
+
)
|
145 |
+
parser.add_argument(
|
146 |
+
"--sampling_num",
|
147 |
+
type=int,
|
148 |
+
default=-1,
|
149 |
+
help="Number of samples used for training. If you want to use all samples, set -1.",
|
150 |
+
)
|
151 |
+
|
152 |
+
return parser.parse_args()
|
153 |
+
|
154 |
+
|
155 |
+
if __name__ == "__main__":
|
156 |
+
CFG = parse_args()
|
157 |
+
CFG.disable_tqdm = True
|
158 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
159 |
+
seed_everything(seed=CFG.seed)
|
160 |
+
|
161 |
+
train = preprocess_df(
|
162 |
+
filter_out(pd.read_csv(CFG.train_data_path), ["REACTANT", "PRODUCT"])
|
163 |
+
)
|
164 |
+
valid = preprocess_df(
|
165 |
+
filter_out(pd.read_csv(CFG.valid_data_path), ["REACTANT", "PRODUCT"])
|
166 |
+
)
|
167 |
+
if CFG.sampling_num > 0:
|
168 |
+
train = train.sample(n=CFG.sampling_num, random_state=CFG.seed).reset_index(
|
169 |
+
drop=True
|
170 |
+
)
|
171 |
+
|
172 |
+
if CFG.similar_reaction_data_path:
|
173 |
+
similar = preprocess_df(
|
174 |
+
filter_out(
|
175 |
+
pd.read_csv(CFG.similar_reaction_data_path), ["REACTANT", "PRODUCT"]
|
176 |
+
)
|
177 |
+
)
|
178 |
+
print(len(train))
|
179 |
+
train = pd.concat([train, similar], ignore_index=True)
|
180 |
+
print(len(train))
|
181 |
+
|
182 |
+
for col in ["REAGENT"]:
|
183 |
+
train[col] = train[col].fillna(" ")
|
184 |
+
valid[col] = valid[col].fillna(" ")
|
185 |
+
train["input"] = "REACTANT:" + train["REACTANT"] + "REAGENT:" + train["REAGENT"]
|
186 |
+
valid["input"] = "REACTANT:" + valid["REACTANT"] + "REAGENT:" + valid["REAGENT"]
|
187 |
+
|
188 |
+
if CFG.debug:
|
189 |
+
train = train[: int(len(train) / 40)].reset_index(drop=True)
|
190 |
+
valid = valid[: int(len(valid) / 40)].reset_index(drop=True)
|
191 |
+
|
192 |
+
dataset = DatasetDict(
|
193 |
+
{
|
194 |
+
"train": Dataset.from_pandas(train[["input", "PRODUCT"]]),
|
195 |
+
"validation": Dataset.from_pandas(valid[["input", "PRODUCT"]]),
|
196 |
+
}
|
197 |
+
)
|
198 |
+
|
199 |
+
# load tokenizer
|
200 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
201 |
+
os.path.abspath(CFG.model_name_or_path)
|
202 |
+
if os.path.exists(CFG.model_name_or_path)
|
203 |
+
else CFG.model_name_or_path,
|
204 |
+
return_tensors="pt",
|
205 |
+
)
|
206 |
+
CFG.tokenizer = tokenizer
|
207 |
+
|
208 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(
|
209 |
+
os.path.abspath(CFG.model_name_or_path) if os.path.exists(CFG.model_name_or_path) else CFG.model_name_or_path
|
210 |
+
).to(device)
|
211 |
+
tokenized_datasets = dataset.map(
|
212 |
+
lambda examples: preprocess_dataset(examples, CFG),
|
213 |
+
batched=True,
|
214 |
+
remove_columns=dataset["train"].column_names,
|
215 |
+
)
|
216 |
+
|
217 |
+
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
|
218 |
+
|
219 |
+
args = Seq2SeqTrainingArguments(
|
220 |
+
CFG.output_dir,
|
221 |
+
evaluation_strategy=CFG.evaluation_strategy,
|
222 |
+
save_strategy=CFG.save_strategy,
|
223 |
+
logging_strategy=CFG.logging_strategy,
|
224 |
+
learning_rate=CFG.lr,
|
225 |
+
per_device_train_batch_size=CFG.batch_size,
|
226 |
+
per_device_eval_batch_size=CFG.batch_size * 4,
|
227 |
+
weight_decay=CFG.weight_decay,
|
228 |
+
save_total_limit=CFG.save_total_limit,
|
229 |
+
num_train_epochs=CFG.epochs,
|
230 |
+
predict_with_generate=True,
|
231 |
+
fp16=CFG.fp16,
|
232 |
+
disable_tqdm=CFG.disable_tqdm,
|
233 |
+
push_to_hub=False,
|
234 |
+
load_best_model_at_end=True,
|
235 |
+
)
|
236 |
+
|
237 |
+
model.config.eval_beams = CFG.eval_beams
|
238 |
+
model.config.max_length = CFG.target_max_length
|
239 |
+
trainer = Seq2SeqTrainer(
|
240 |
+
model,
|
241 |
+
args,
|
242 |
+
train_dataset=tokenized_datasets["train"],
|
243 |
+
eval_dataset=tokenized_datasets["validation"],
|
244 |
+
data_collator=data_collator,
|
245 |
+
tokenizer=tokenizer,
|
246 |
+
compute_metrics=lambda eval_preds: get_accuracy_score(eval_preds, CFG),
|
247 |
+
callbacks=[EarlyStoppingCallback(early_stopping_patience=10)],
|
248 |
+
)
|
249 |
+
|
250 |
+
trainer.train()
|
251 |
+
trainer.save_model("./best_model")
|
task_forward/generate_embedding.py
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import warnings
|
5 |
+
|
6 |
+
import numpy as np
|
7 |
+
import pandas as pd
|
8 |
+
import torch
|
9 |
+
from torch.utils.data import DataLoader
|
10 |
+
from transformers import AutoTokenizer, T5EncoderModel
|
11 |
+
|
12 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
13 |
+
from generation_utils import ReactionT5Dataset
|
14 |
+
from train import preprocess_df, preprocess_USPTO
|
15 |
+
from utils import filter_out, seed_everything
|
16 |
+
|
17 |
+
warnings.filterwarnings("ignore")
|
18 |
+
|
19 |
+
|
20 |
+
def parse_args():
|
21 |
+
parser = argparse.ArgumentParser()
|
22 |
+
parser.add_argument(
|
23 |
+
"--input_data",
|
24 |
+
type=str,
|
25 |
+
required=True,
|
26 |
+
help="Path to the input data.",
|
27 |
+
)
|
28 |
+
parser.add_argument(
|
29 |
+
"--test_data",
|
30 |
+
type=str,
|
31 |
+
required=False,
|
32 |
+
help="Path to the test data. If provided, the duplicates will be removed from the input data.",
|
33 |
+
)
|
34 |
+
parser.add_argument(
|
35 |
+
"--input_max_length",
|
36 |
+
type=int,
|
37 |
+
default=400,
|
38 |
+
help="Maximum token length of input.",
|
39 |
+
)
|
40 |
+
parser.add_argument(
|
41 |
+
"--model_name_or_path",
|
42 |
+
type=str,
|
43 |
+
default="sagawa/ReactionT5v2-forward",
|
44 |
+
help="Name or path of the finetuned model for prediction. Can be a local model or one from Hugging Face.",
|
45 |
+
)
|
46 |
+
parser.add_argument(
|
47 |
+
"--batch_size", type=int, default=5, help="Batch size for prediction."
|
48 |
+
)
|
49 |
+
parser.add_argument(
|
50 |
+
"--output_dir",
|
51 |
+
type=str,
|
52 |
+
default="./",
|
53 |
+
help="Directory where predictions are saved.",
|
54 |
+
)
|
55 |
+
parser.add_argument(
|
56 |
+
"--debug", action="store_true", default=False, help="Use debug mode."
|
57 |
+
)
|
58 |
+
parser.add_argument(
|
59 |
+
"--seed", type=int, default=42, help="Seed for reproducibility."
|
60 |
+
)
|
61 |
+
return parser.parse_args()
|
62 |
+
|
63 |
+
|
64 |
+
def create_embedding(dataloader, model, device):
|
65 |
+
outputs_mean = []
|
66 |
+
model.eval()
|
67 |
+
model.to(device)
|
68 |
+
for inputs in dataloader:
|
69 |
+
inputs = {k: v.to(CFG.device) for k, v in inputs.items()}
|
70 |
+
with torch.no_grad():
|
71 |
+
output = model(**inputs)
|
72 |
+
last_hidden_states = output[0]
|
73 |
+
input_mask_expanded = (
|
74 |
+
inputs["attention_mask"]
|
75 |
+
.unsqueeze(-1)
|
76 |
+
.expand(last_hidden_states.size())
|
77 |
+
.float()
|
78 |
+
)
|
79 |
+
sum_embeddings = torch.sum(last_hidden_states * input_mask_expanded, 1)
|
80 |
+
sum_mask = input_mask_expanded.sum(1)
|
81 |
+
sum_mask = torch.clamp(sum_mask, min=1e-6)
|
82 |
+
mean_embeddings = sum_embeddings / sum_mask
|
83 |
+
outputs_mean.append(mean_embeddings.detach().cpu().numpy())
|
84 |
+
|
85 |
+
return outputs_mean
|
86 |
+
|
87 |
+
|
88 |
+
if __name__ == "__main__":
|
89 |
+
CFG = parse_args()
|
90 |
+
CFG.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
91 |
+
|
92 |
+
if not os.path.exists(CFG.output_dir):
|
93 |
+
os.makedirs(CFG.output_dir)
|
94 |
+
|
95 |
+
seed_everything(seed=CFG.seed)
|
96 |
+
|
97 |
+
CFG.tokenizer = AutoTokenizer.from_pretrained(
|
98 |
+
os.path.abspath(CFG.model_name_or_path)
|
99 |
+
if os.path.exists(CFG.model_name_or_path)
|
100 |
+
else CFG.model_name_or_path,
|
101 |
+
return_tensors="pt",
|
102 |
+
)
|
103 |
+
model = T5EncoderModel.from_pretrained(CFG.model_name_or_path).to(CFG.device)
|
104 |
+
model.eval()
|
105 |
+
|
106 |
+
input_data = filter_out(pd.read_csv(CFG.input_data), ["REACTANT", "PRODUCT"])
|
107 |
+
input_data = preprocess_df(input_data, drop_duplicates=False)
|
108 |
+
if CFG.test_data:
|
109 |
+
input_data_copy = preprocess_USPTO(input_data.copy())
|
110 |
+
test_data = filter_out(pd.read_csv(CFG.test_data), ["REACTANT", "PRODUCT"])
|
111 |
+
USPTO_test = preprocess_USPTO(test_data)
|
112 |
+
input_data = input_data[
|
113 |
+
~input_data_copy["pair"].isin(USPTO_test["pair"])
|
114 |
+
].reset_index(drop=True)
|
115 |
+
input_data.to_csv(os.path.join(CFG.output_dir, "input_data.csv"), index=False)
|
116 |
+
dataset = ReactionT5Dataset(CFG, input_data)
|
117 |
+
dataloader = DataLoader(
|
118 |
+
dataset,
|
119 |
+
batch_size=CFG.batch_size,
|
120 |
+
shuffle=False,
|
121 |
+
num_workers=4,
|
122 |
+
pin_memory=True,
|
123 |
+
drop_last=False,
|
124 |
+
)
|
125 |
+
|
126 |
+
outputs = create_embedding(dataloader, model, CFG.device)
|
127 |
+
outputs = np.concatenate(outputs, axis=0)
|
128 |
+
|
129 |
+
np.save(os.path.join(CFG.output_dir, "embedding_mean.npy"), outputs)
|
task_forward/get_distance.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import warnings
|
5 |
+
|
6 |
+
import numpy as np
|
7 |
+
import pandas as pd
|
8 |
+
import torch
|
9 |
+
|
10 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
11 |
+
from utils import seed_everything
|
12 |
+
|
13 |
+
warnings.filterwarnings("ignore")
|
14 |
+
|
15 |
+
|
16 |
+
def parse_args():
|
17 |
+
parser = argparse.ArgumentParser(description="Search for similar reactions.")
|
18 |
+
parser.add_argument(
|
19 |
+
"--input_data",
|
20 |
+
type=str,
|
21 |
+
required=True,
|
22 |
+
help="Path to the input data.",
|
23 |
+
)
|
24 |
+
parser.add_argument(
|
25 |
+
"--target_embedding",
|
26 |
+
type=str,
|
27 |
+
required=True,
|
28 |
+
help="Path to the target embedding.",
|
29 |
+
)
|
30 |
+
parser.add_argument(
|
31 |
+
"--query_embedding",
|
32 |
+
type=str,
|
33 |
+
required=True,
|
34 |
+
help="Path to the target embedding.",
|
35 |
+
)
|
36 |
+
parser.add_argument("--batch_size", type=int, default=64, help="Batch size.")
|
37 |
+
parser.add_argument(
|
38 |
+
"--output_dir",
|
39 |
+
type=str,
|
40 |
+
default="./",
|
41 |
+
help="Directory where results are saved.",
|
42 |
+
)
|
43 |
+
|
44 |
+
return parser.parse_args()
|
45 |
+
|
46 |
+
|
47 |
+
if __name__ == "__main__":
|
48 |
+
config = parse_args()
|
49 |
+
seed_everything(42)
|
50 |
+
|
51 |
+
target_embedding = np.load(config.target_embedding)
|
52 |
+
query_embedding = np.load(config.query_embedding)
|
53 |
+
|
54 |
+
target_embedding = torch.tensor(target_embedding, dtype=torch.float32).cuda()
|
55 |
+
query_embedding = torch.tensor(query_embedding, dtype=torch.float32).cuda()
|
56 |
+
|
57 |
+
target_embedding = torch.nn.functional.normalize(target_embedding, p=2, dim=1)
|
58 |
+
query_embedding = torch.nn.functional.normalize(query_embedding, p=2, dim=1)
|
59 |
+
|
60 |
+
batch_size = config.batch_size
|
61 |
+
distances = []
|
62 |
+
|
63 |
+
for i in range(0, query_embedding.shape[0], batch_size):
|
64 |
+
print(f"Processing batch {i // batch_size}...")
|
65 |
+
batch = query_embedding[i : i + batch_size]
|
66 |
+
similarity = torch.matmul(batch, target_embedding.T)
|
67 |
+
distance, _ = torch.max(similarity, dim=1)
|
68 |
+
distances.append(distance.cpu().tolist())
|
69 |
+
|
70 |
+
distances = np.concatenate(distances)
|
71 |
+
|
72 |
+
df = pd.read_csv(config.input_data)
|
73 |
+
df["distance"] = distances
|
74 |
+
df.to_csv(os.path.join(config.output_dir, "distance.csv"), index=False)
|
task_forward/prediction.py
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import gc
|
3 |
+
import os
|
4 |
+
import sys
|
5 |
+
import warnings
|
6 |
+
|
7 |
+
import pandas as pd
|
8 |
+
import torch
|
9 |
+
from torch.utils.data import DataLoader
|
10 |
+
from tqdm import tqdm
|
11 |
+
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
12 |
+
|
13 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
14 |
+
from generation_utils import (
|
15 |
+
ReactionT5Dataset,
|
16 |
+
decode_output,
|
17 |
+
save_multiple_predictions,
|
18 |
+
)
|
19 |
+
from train import preprocess_df
|
20 |
+
from utils import seed_everything
|
21 |
+
|
22 |
+
warnings.filterwarnings("ignore")
|
23 |
+
|
24 |
+
|
25 |
+
def parse_args():
|
26 |
+
parser = argparse.ArgumentParser(
|
27 |
+
description="Script for reaction product prediction."
|
28 |
+
)
|
29 |
+
parser.add_argument(
|
30 |
+
"--input_data",
|
31 |
+
type=str,
|
32 |
+
required=True,
|
33 |
+
help="Path to the input data.",
|
34 |
+
)
|
35 |
+
parser.add_argument(
|
36 |
+
"--input_max_length",
|
37 |
+
type=int,
|
38 |
+
default=400,
|
39 |
+
help="Maximum token length of input.",
|
40 |
+
)
|
41 |
+
parser.add_argument(
|
42 |
+
"--output_min_length",
|
43 |
+
type=int,
|
44 |
+
default=1,
|
45 |
+
help="Minimum token length of output.",
|
46 |
+
)
|
47 |
+
parser.add_argument(
|
48 |
+
"--output_max_length",
|
49 |
+
type=int,
|
50 |
+
default=300,
|
51 |
+
help="Maximum token length of output.",
|
52 |
+
)
|
53 |
+
parser.add_argument(
|
54 |
+
"--model_name_or_path",
|
55 |
+
type=str,
|
56 |
+
default="sagawa/ReactionT5v2-forward",
|
57 |
+
help="Name or path of the finetuned model for prediction. Can be a local model or one from Hugging Face.",
|
58 |
+
)
|
59 |
+
parser.add_argument(
|
60 |
+
"--num_beams", type=int, default=5, help="Number of beams used for beam search."
|
61 |
+
)
|
62 |
+
parser.add_argument(
|
63 |
+
"--num_return_sequences",
|
64 |
+
type=int,
|
65 |
+
default=5,
|
66 |
+
help="Number of predictions returned. Must be less than or equal to num_beams.",
|
67 |
+
)
|
68 |
+
parser.add_argument(
|
69 |
+
"--batch_size", type=int, default=5, help="Batch size for prediction."
|
70 |
+
)
|
71 |
+
parser.add_argument(
|
72 |
+
"--output_dir",
|
73 |
+
type=str,
|
74 |
+
default="./",
|
75 |
+
help="Directory where predictions are saved.",
|
76 |
+
)
|
77 |
+
parser.add_argument(
|
78 |
+
"--debug", action="store_true", default=False, help="Use debug mode."
|
79 |
+
)
|
80 |
+
parser.add_argument(
|
81 |
+
"--seed", type=int, default=42, help="Seed for reproducibility."
|
82 |
+
)
|
83 |
+
return parser.parse_args()
|
84 |
+
|
85 |
+
|
86 |
+
if __name__ == "__main__":
|
87 |
+
CFG = parse_args()
|
88 |
+
CFG.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
89 |
+
|
90 |
+
if not os.path.exists(CFG.output_dir):
|
91 |
+
os.makedirs(CFG.output_dir)
|
92 |
+
|
93 |
+
seed_everything(seed=CFG.seed)
|
94 |
+
|
95 |
+
CFG.tokenizer = AutoTokenizer.from_pretrained(
|
96 |
+
os.path.abspath(CFG.model_name_or_path)
|
97 |
+
if os.path.exists(CFG.model_name_or_path)
|
98 |
+
else CFG.model_name_or_path,
|
99 |
+
return_tensors="pt",
|
100 |
+
)
|
101 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(
|
102 |
+
os.path.abspath(CFG.model_name_or_path)
|
103 |
+
if os.path.exists(CFG.model_name_or_path)
|
104 |
+
else CFG.model_name_or_path
|
105 |
+
).to(CFG.device)
|
106 |
+
model.eval()
|
107 |
+
|
108 |
+
input_data = pd.read_csv(CFG.input_data)
|
109 |
+
input_data = preprocess_df(input_data, drop_duplicates=False)
|
110 |
+
dataset = ReactionT5Dataset(CFG, input_data)
|
111 |
+
dataloader = DataLoader(
|
112 |
+
dataset,
|
113 |
+
batch_size=CFG.batch_size,
|
114 |
+
shuffle=False,
|
115 |
+
num_workers=4,
|
116 |
+
pin_memory=True,
|
117 |
+
drop_last=False,
|
118 |
+
)
|
119 |
+
|
120 |
+
all_sequences, all_scores = [], []
|
121 |
+
for inputs in tqdm(dataloader, total=len(dataloader)):
|
122 |
+
inputs = {k: v.to(CFG.device) for k, v in inputs.items()}
|
123 |
+
with torch.no_grad():
|
124 |
+
output = model.generate(
|
125 |
+
**inputs,
|
126 |
+
min_length=CFG.output_min_length,
|
127 |
+
max_length=CFG.output_max_length,
|
128 |
+
num_beams=CFG.num_beams,
|
129 |
+
num_return_sequences=CFG.num_return_sequences,
|
130 |
+
return_dict_in_generate=True,
|
131 |
+
output_scores=True,
|
132 |
+
)
|
133 |
+
sequences, scores = decode_output(output, CFG)
|
134 |
+
all_sequences.extend(sequences)
|
135 |
+
if scores:
|
136 |
+
all_scores.extend(scores)
|
137 |
+
del output
|
138 |
+
torch.cuda.empty_cache()
|
139 |
+
gc.collect()
|
140 |
+
|
141 |
+
output_df = save_multiple_predictions(input_data, all_sequences, all_scores, CFG)
|
142 |
+
|
143 |
+
output_df.to_csv(os.path.join(CFG.output_dir, "output.csv"), index=False)
|
task_forward/train.py
ADDED
@@ -0,0 +1,312 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import warnings
|
5 |
+
from pathlib import Path
|
6 |
+
|
7 |
+
import datasets
|
8 |
+
import pandas as pd
|
9 |
+
import torch
|
10 |
+
from datasets import Dataset, DatasetDict
|
11 |
+
from transformers import (
|
12 |
+
AutoModelForSeq2SeqLM,
|
13 |
+
AutoTokenizer,
|
14 |
+
DataCollatorForSeq2Seq,
|
15 |
+
EarlyStoppingCallback,
|
16 |
+
Seq2SeqTrainer,
|
17 |
+
Seq2SeqTrainingArguments,
|
18 |
+
)
|
19 |
+
|
20 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
21 |
+
from utils import (
|
22 |
+
add_new_tokens,
|
23 |
+
canonicalize,
|
24 |
+
filter_out,
|
25 |
+
get_accuracy_score,
|
26 |
+
preprocess_dataset,
|
27 |
+
seed_everything,
|
28 |
+
space_clean,
|
29 |
+
)
|
30 |
+
|
31 |
+
# Suppress warnings and disable progress bars
|
32 |
+
warnings.filterwarnings("ignore")
|
33 |
+
datasets.utils.logging.disable_progress_bar()
|
34 |
+
|
35 |
+
|
36 |
+
def parse_args():
|
37 |
+
"""Parse command line arguments."""
|
38 |
+
parser = argparse.ArgumentParser(
|
39 |
+
description="Training script for reaction prediction model."
|
40 |
+
)
|
41 |
+
parser.add_argument(
|
42 |
+
"--train_data_path", type=str, required=True, help="Path to training data CSV."
|
43 |
+
)
|
44 |
+
parser.add_argument(
|
45 |
+
"--valid_data_path",
|
46 |
+
type=str,
|
47 |
+
required=True,
|
48 |
+
help="Path to validation data CSV.",
|
49 |
+
)
|
50 |
+
parser.add_argument("--test_data_path", type=str, help="Path to test data CSV.")
|
51 |
+
parser.add_argument(
|
52 |
+
"--USPTO_test_data_path",
|
53 |
+
type=str,
|
54 |
+
help="The path to data used for USPTO testing. CSV file that contains ['REACTANT', 'REAGENT', 'PRODUCT'] columns is expected.",
|
55 |
+
)
|
56 |
+
parser.add_argument(
|
57 |
+
"--output_dir", type=str, default="t5", help="Path of the output directory."
|
58 |
+
)
|
59 |
+
parser.add_argument(
|
60 |
+
"--pretrained_model_name_or_path",
|
61 |
+
type=str,
|
62 |
+
required=True,
|
63 |
+
help="Pretrained model path or name.",
|
64 |
+
)
|
65 |
+
parser.add_argument(
|
66 |
+
"--debug", action="store_true", default=False, help="Enable debug mode."
|
67 |
+
)
|
68 |
+
parser.add_argument(
|
69 |
+
"--epochs",
|
70 |
+
type=int,
|
71 |
+
default=5,
|
72 |
+
help="Number of epochs.",
|
73 |
+
)
|
74 |
+
parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate.")
|
75 |
+
parser.add_argument("--batch_size", type=int, default=16, help="Batch size.")
|
76 |
+
parser.add_argument(
|
77 |
+
"--input_max_length",
|
78 |
+
type=int,
|
79 |
+
default=400,
|
80 |
+
help="Max input token length.",
|
81 |
+
)
|
82 |
+
parser.add_argument(
|
83 |
+
"--target_max_length",
|
84 |
+
type=int,
|
85 |
+
default=150,
|
86 |
+
help="Max target token length.",
|
87 |
+
)
|
88 |
+
parser.add_argument(
|
89 |
+
"--eval_beams",
|
90 |
+
type=int,
|
91 |
+
default=5,
|
92 |
+
help="Number of beams used for beam search during evaluation.",
|
93 |
+
)
|
94 |
+
parser.add_argument(
|
95 |
+
"--target_column",
|
96 |
+
type=str,
|
97 |
+
default="PRODUCT",
|
98 |
+
help="Target column name.",
|
99 |
+
)
|
100 |
+
parser.add_argument(
|
101 |
+
"--weight_decay",
|
102 |
+
type=float,
|
103 |
+
default=0.01,
|
104 |
+
help="Weight decay.",
|
105 |
+
)
|
106 |
+
parser.add_argument(
|
107 |
+
"--evaluation_strategy",
|
108 |
+
type=str,
|
109 |
+
default="epoch",
|
110 |
+
help="Evaluation strategy used during training. Select from 'no', 'steps', or 'epoch'. If you select 'steps', also give --eval_steps.",
|
111 |
+
)
|
112 |
+
parser.add_argument(
|
113 |
+
"--eval_steps",
|
114 |
+
type=int,
|
115 |
+
help="Evaluation steps.",
|
116 |
+
)
|
117 |
+
parser.add_argument(
|
118 |
+
"--save_strategy",
|
119 |
+
type=str,
|
120 |
+
default="epoch",
|
121 |
+
help="Save strategy used during training. Select from 'no', 'steps', or 'epoch'. If you select 'steps', also give --save_steps.",
|
122 |
+
)
|
123 |
+
parser.add_argument(
|
124 |
+
"--save_steps",
|
125 |
+
type=int,
|
126 |
+
default=500,
|
127 |
+
help="Save steps.",
|
128 |
+
)
|
129 |
+
parser.add_argument(
|
130 |
+
"--logging_strategy",
|
131 |
+
type=str,
|
132 |
+
default="epoch",
|
133 |
+
help="Logging strategy used during training. Select from 'no', 'steps', or 'epoch'. If you select 'steps', also give --logging_steps.",
|
134 |
+
)
|
135 |
+
parser.add_argument(
|
136 |
+
"--logging_steps",
|
137 |
+
type=int,
|
138 |
+
default=500,
|
139 |
+
help="Logging steps.",
|
140 |
+
)
|
141 |
+
parser.add_argument(
|
142 |
+
"--save_total_limit",
|
143 |
+
type=int,
|
144 |
+
default=2,
|
145 |
+
help="Limit of saved checkpoints.",
|
146 |
+
)
|
147 |
+
parser.add_argument(
|
148 |
+
"--fp16",
|
149 |
+
action="store_true",
|
150 |
+
default=False,
|
151 |
+
help="Enable fp16 training.",
|
152 |
+
)
|
153 |
+
parser.add_argument(
|
154 |
+
"--disable_tqdm",
|
155 |
+
action="store_true",
|
156 |
+
default=False,
|
157 |
+
help="Disable tqdm.",
|
158 |
+
)
|
159 |
+
parser.add_argument(
|
160 |
+
"--seed",
|
161 |
+
type=int,
|
162 |
+
default=42,
|
163 |
+
help="Random seed.",
|
164 |
+
)
|
165 |
+
|
166 |
+
return parser.parse_args()
|
167 |
+
|
168 |
+
|
169 |
+
def preprocess_df(df, drop_duplicates=True):
|
170 |
+
"""Preprocess the dataframe by filling NaNs, dropping duplicates, and formatting the input."""
|
171 |
+
for col in ["REACTANT", "PRODUCT", "CATALYST", "REAGENT", "SOLVENT"]:
|
172 |
+
if col not in df.columns:
|
173 |
+
df[col] = None
|
174 |
+
df[col] = df[col].fillna(" ")
|
175 |
+
if drop_duplicates:
|
176 |
+
df = (
|
177 |
+
df[["REACTANT", "PRODUCT", "CATALYST", "REAGENT", "SOLVENT"]]
|
178 |
+
.drop_duplicates()
|
179 |
+
.reset_index(drop=True)
|
180 |
+
)
|
181 |
+
df["REAGENT"] = df["CATALYST"] + "." + df["REAGENT"] + "." + df["SOLVENT"]
|
182 |
+
df["REAGENT"] = df["REAGENT"].apply(lambda x: space_clean(x))
|
183 |
+
df["REAGENT"] = df["REAGENT"].apply(lambda x: canonicalize(x) if x != " " else " ")
|
184 |
+
df["input"] = "REACTANT:" + df["REACTANT"] + "REAGENT:" + df["REAGENT"]
|
185 |
+
return df
|
186 |
+
|
187 |
+
|
188 |
+
def preprocess_USPTO(df):
|
189 |
+
df["REACTANT"] = df["REACTANT"].apply(lambda x: str(sorted(x.split("."))))
|
190 |
+
df["REAGENT"] = df["REAGENT"].apply(lambda x: str(sorted(x.split("."))))
|
191 |
+
df["PRODUCT"] = df["PRODUCT"].apply(lambda x: str(sorted(x.split("."))))
|
192 |
+
|
193 |
+
df["input"] = "REACTANT:" + df["REACTANT"] + "REAGENT:" + df["REAGENT"]
|
194 |
+
df["pair"] = df["input"] + " - " + df["PRODUCT"].astype(str)
|
195 |
+
|
196 |
+
return df
|
197 |
+
|
198 |
+
|
199 |
+
if __name__ == "__main__":
|
200 |
+
CFG = parse_args()
|
201 |
+
CFG.disable_tqdm = True
|
202 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
203 |
+
seed_everything(seed=CFG.seed)
|
204 |
+
|
205 |
+
# Load and preprocess data
|
206 |
+
train = preprocess_df(
|
207 |
+
filter_out(pd.read_csv(CFG.train_data_path), ["REACTANT", "PRODUCT"])
|
208 |
+
)
|
209 |
+
valid = preprocess_df(
|
210 |
+
filter_out(pd.read_csv(CFG.valid_data_path), ["REACTANT", "PRODUCT"])
|
211 |
+
)
|
212 |
+
if CFG.USPTO_test_data_path:
|
213 |
+
train_copy = preprocess_USPTO(train.copy())
|
214 |
+
USPTO_test = preprocess_USPTO(pd.read_csv(CFG.USPTO_test_data_path))
|
215 |
+
train = train[~train_copy["pair"].isin(USPTO_test["pair"])].reset_index(
|
216 |
+
drop=True
|
217 |
+
)
|
218 |
+
train["pair"] = train["input"] + " - " + train["PRODUCT"]
|
219 |
+
valid["pair"] = valid["input"] + " - " + valid["PRODUCT"]
|
220 |
+
valid = valid[~valid["pair"].isin(train["pair"])].reset_index(drop=True)
|
221 |
+
train.to_csv("train.csv", index=False)
|
222 |
+
valid.to_csv("valid.csv", index=False)
|
223 |
+
|
224 |
+
if CFG.test_data_path:
|
225 |
+
test = preprocess_df(
|
226 |
+
filter_out(pd.read_csv(CFG.test_data_path), ["REACTANT", "PRODUCT"])
|
227 |
+
)
|
228 |
+
test["pair"] = test["input"] + " - " + test["PRODUCT"]
|
229 |
+
test = test[~test["pair"].isin(train["pair"])].reset_index(drop=True)
|
230 |
+
test = test.drop_duplicates(subset=["pair"]).reset_index(drop=True)
|
231 |
+
test.to_csv("test.csv", index=False)
|
232 |
+
|
233 |
+
dataset = DatasetDict(
|
234 |
+
{
|
235 |
+
"train": Dataset.from_pandas(train[["input", "PRODUCT"]]),
|
236 |
+
"validation": Dataset.from_pandas(valid[["input", "PRODUCT"]]),
|
237 |
+
}
|
238 |
+
)
|
239 |
+
|
240 |
+
# load tokenizer
|
241 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
242 |
+
os.path.abspath(CFG.pretrained_model_name_or_path)
|
243 |
+
if os.path.exists(CFG.pretrained_model_name_or_path)
|
244 |
+
else CFG.pretrained_model_name_or_path,
|
245 |
+
return_tensors="pt",
|
246 |
+
)
|
247 |
+
tokenizer = add_new_tokens(
|
248 |
+
tokenizer,
|
249 |
+
Path(__file__).resolve().parent.parent / "data" / "additional_tokens.txt",
|
250 |
+
)
|
251 |
+
tokenizer.add_special_tokens(
|
252 |
+
{
|
253 |
+
"additional_special_tokens": tokenizer.additional_special_tokens
|
254 |
+
+ ["REACTANT:", "REAGENT:"]
|
255 |
+
}
|
256 |
+
)
|
257 |
+
CFG.tokenizer = tokenizer
|
258 |
+
|
259 |
+
# load model
|
260 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(
|
261 |
+
os.path.abspath(CFG.pretrained_model_name_or_path) if os.path.exists(CFG.pretrained_model_name_or_path) else CFG.pretrained_model_name_or_path
|
262 |
+
)
|
263 |
+
model.resize_token_embeddings(len(tokenizer))
|
264 |
+
|
265 |
+
tokenized_datasets = dataset.map(
|
266 |
+
lambda examples: preprocess_dataset(examples, CFG),
|
267 |
+
batched=True,
|
268 |
+
remove_columns=dataset["train"].column_names,
|
269 |
+
load_from_cache_file=False,
|
270 |
+
)
|
271 |
+
|
272 |
+
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
|
273 |
+
|
274 |
+
args = Seq2SeqTrainingArguments(
|
275 |
+
CFG.output_dir,
|
276 |
+
evaluation_strategy=CFG.evaluation_strategy,
|
277 |
+
eval_steps=CFG.eval_steps,
|
278 |
+
save_strategy=CFG.save_strategy,
|
279 |
+
save_steps=CFG.save_steps,
|
280 |
+
logging_strategy=CFG.logging_strategy,
|
281 |
+
logging_steps=CFG.logging_steps,
|
282 |
+
learning_rate=CFG.lr,
|
283 |
+
per_device_train_batch_size=CFG.batch_size,
|
284 |
+
per_device_eval_batch_size=CFG.batch_size,
|
285 |
+
weight_decay=CFG.weight_decay,
|
286 |
+
save_total_limit=CFG.save_total_limit,
|
287 |
+
num_train_epochs=CFG.epochs,
|
288 |
+
predict_with_generate=True,
|
289 |
+
fp16=CFG.fp16,
|
290 |
+
disable_tqdm=CFG.disable_tqdm,
|
291 |
+
push_to_hub=False,
|
292 |
+
load_best_model_at_end=True,
|
293 |
+
)
|
294 |
+
|
295 |
+
model.config.eval_beams = CFG.eval_beams
|
296 |
+
model.config.max_length = CFG.target_max_length
|
297 |
+
trainer = Seq2SeqTrainer(
|
298 |
+
model,
|
299 |
+
args,
|
300 |
+
train_dataset=tokenized_datasets["train"],
|
301 |
+
eval_dataset=tokenized_datasets["validation"],
|
302 |
+
data_collator=data_collator,
|
303 |
+
tokenizer=tokenizer,
|
304 |
+
compute_metrics=lambda eval_preds: get_accuracy_score(eval_preds, CFG),
|
305 |
+
callbacks=[EarlyStoppingCallback(early_stopping_patience=10)],
|
306 |
+
)
|
307 |
+
|
308 |
+
try:
|
309 |
+
trainer.train(resume_from_checkpoint=True)
|
310 |
+
except:
|
311 |
+
trainer.train(resume_from_checkpoint=None)
|
312 |
+
trainer.save_model("./best_model")
|
task_forward/visualize_embedding.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
task_retrosynthesis/accuracy-and-invalidity-check.ipynb
ADDED
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"id": "43813b12",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [],
|
9 |
+
"source": [
|
10 |
+
"prediction: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 139/139 [35:54<00:00, 15.50s/it]\n",
|
11 |
+
"Top-1: 0.3% || Invalid 15.75%\n",
|
12 |
+
"Top-2: 0.5% || Invalid 22.04%\n",
|
13 |
+
"Top-3: 0.7% || Invalid 25.83%\n",
|
14 |
+
"Top-4: 0.9% || Invalid 28.69%\n",
|
15 |
+
"Top-5: 1.1% || Invalid 30.74%\n",
|
16 |
+
"prediction: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 139/139 [36:00<00:00, 15.55s/it]\n",
|
17 |
+
"Top-1: 0.3% || Invalid 23.68%\n",
|
18 |
+
"Top-2: 0.5% || Invalid 28.60%\n",
|
19 |
+
"Top-3: 0.7% || Invalid 32.01%\n",
|
20 |
+
"Top-4: 0.9% || Invalid 34.58%\n",
|
21 |
+
"Top-5: 1.0% || Invalid 36.95%\n",
|
22 |
+
"prediction: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 139/139 [35:03<00:00, 15.13s/it]\n",
|
23 |
+
"Top-1: 0.1% || Invalid 29.90%\n",
|
24 |
+
"Top-2: 0.1% || Invalid 34.33%\n",
|
25 |
+
"Top-3: 0.2% || Invalid 37.83%\n",
|
26 |
+
"Top-4: 0.3% || Invalid 40.49%\n",
|
27 |
+
"Top-5: 0.4% || Invalid 43.11%\n",
|
28 |
+
"prediction: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 139/139 [35:27<00:00, 15.31s/it]\n",
|
29 |
+
"Top-1: 0.0% || Invalid 55.78%\n",
|
30 |
+
"Top-2: 0.1% || Invalid 58.94%\n",
|
31 |
+
"Top-3: 0.1% || Invalid 61.21%\n",
|
32 |
+
"Top-4: 0.1% || Invalid 63.35%\n",
|
33 |
+
"Top-5: 0.1% || Invalid 65.17%\n",
|
34 |
+
"prediction: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 139/139 [35:27<00:00, 15.30s/it]\n",
|
35 |
+
"Top-1: 0.1% || Invalid 44.12%\n",
|
36 |
+
"Top-2: 0.1% || Invalid 48.06%\n",
|
37 |
+
"Top-3: 0.1% || Invalid 51.93%\n",
|
38 |
+
"Top-4: 0.1% || Invalid 54.31%\n",
|
39 |
+
"Top-5: 0.2% || Invalid 56.56%\n"
|
40 |
+
]
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"cell_type": "code",
|
44 |
+
"execution_count": 5,
|
45 |
+
"id": "cf10c9e8",
|
46 |
+
"metadata": {},
|
47 |
+
"outputs": [
|
48 |
+
{
|
49 |
+
"name": "stderr",
|
50 |
+
"output_type": "stream",
|
51 |
+
"text": [
|
52 |
+
"/tmp/ipykernel_2055775/4280584905.py:21: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.\n",
|
53 |
+
" ax.set_yticklabels([int(i) for i in ax.get_yticks()], fontsize=12)\n"
|
54 |
+
]
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"data": {
|
58 |
+
"text/plain": [
|
59 |
+
"<matplotlib.legend.Legend at 0x7f7834dea750>"
|
60 |
+
]
|
61 |
+
},
|
62 |
+
"execution_count": 5,
|
63 |
+
"metadata": {},
|
64 |
+
"output_type": "execute_result"
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"data": {
|
68 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAp4AAAGzCAYAAACGrBfmAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAABluklEQVR4nO3dd3hUVeLG8e9MJpkkhCSEUBIgIEV6pIgCKwpYEAUVFHtZXd3dALqiKEWlqijiz7r2thZcUcFCU9TggoANUapIDR1SSJ+0ub8/DplkSIIEkpkkvJ/nmQfm3DMzZ5jh5s25p9gsy7IQEREREalmdn83QERERERODQqeIiIiIuITCp4iIiIi4hMKniIiIiLiEwqeIiIiIuITCp4iIiIi4hMKniIiIiLiEwqeIiIiIuITCp4iIiIi4hMnFDyXL1/OJZdcQoMGDQgJCaFdu3ZMnz7dq87q1au54IILCAsLIzIykuHDh7Nt27YqabSIiIiI1D6VDp6zZ8/mvPPOIyIigrfffpuFCxcybtw4Su+8uWnTJvr3709+fj5z5szhjTfeYPPmzfTr149Dhw5V6RsQERERkdrBVpm92vfs2UP79u25+eabeeGFFyqsd/XVV5OYmMjWrVsJDw8HYOfOnbRr144xY8bw+OOPn3zLRURERKRWqVSP52uvvUZ2djbjxo2rsE5hYSHz58/nyiuv9IROgJYtWzJgwADmzZt34q0VERERkVrLUZnK//vf/4iKimLTpk1cfvnlrFu3jqioKIYPH87MmTMJDw9n69at5ObmEh8fX+bx8fHxLFmyBJfLRXBwcLmvkZeXR15enue+2+0mNTWVhg0bYrPZKvn2RERERKS6WZZFZmYmsbGx2O0V92tWKnju2bOHnJwcRowYwYQJE3j66af58ccfmTx5MuvWrWPZsmWkpKQAEBUVVebxUVFRWJZFWloaMTEx5b7GjBkzmDp1amWaJSIiIiI1wK5du2jevHmFxysVPN1uNy6Xi8mTJzN+/HgA+vfvT1BQEHfffTdff/01oaGhAMfsnTzWsQkTJnDPPfd47qenpxMXF8euXbu8Lt2LiIiISM2QkZFBixYtqF+//jHrVSp4NmzYkD/++INBgwZ5lQ8ePJi7776b1atXc/nllwN4ej5LS01NxWazERkZWeFrOJ1OnE5nmfLw8HAFTxEREZEa7M+GRVZqclF54zYBz1JKdrudNm3aEBISwtq1a8vUW7t2LW3btq1wfKeIiIiI1F2VCp5XXnklAIsWLfIqX7hwIQC9e/fG4XAwdOhQ5s6dS2ZmpqdOUlISiYmJDB8+/GTbLCIiIiK1UKXW8QS47LLL+PLLL3nwwQfp3bs3P/30E1OnTuWCCy7g888/B8wC8r169aJHjx6MHz8el8vFpEmTSE1NZc2aNTRq1Oi4Xy8jI4OIiAjS09N1qV1ERESkBjrevFbp4Jmbm8vUqVOZPXs2+/btIzY2lhtuuIHJkyd7jc38+eefGTduHCtXrsThcDBw4EBmzZpFmzZtquWNiIiIiIh/VFvw9DUFTxEREZGa7XjzWqVmtdd0lmVRVFREYWGhv5si4iUwMJCAgAB/N0NERMSv6kTwtCyLw4cPc+jQIYqKivzdHJFyRUZG0rRpU+3AJSIip6w6ETz379/P4cOHPWt9OhwO/XCXGsOyLHJycjh48CBAhbt2iYiI1HW1PngWFRWRnp5Oo0aNiI6O9ndzRMoVEhICwMGDB2ncuLEuu4uIyCmpUut41kQFBQVYlkW9evX83RSRYyreTragoMDPLREREfGPWh88i+nSutR0+o6KiMiprs4ETxERERGp2RQ8RURERMQnFDyPxe2GtWvhf/8zf7rdPm/CW2+9hc1m89wcDgcxMTFce+21/PHHHz5vz9EeffRRPvnkkzLlS5cuxWazsXTp0mp53SlTpnj9u1R069+//zHrBwcHV0v7REREpKxaP6u92qxYAc8/Dxs3Ql4eOJ3QsSOMHg19+/q8OW+++SYdOnTA5XLx3Xff8cgjj5CYmMimTZto0KCBz9tT7NFHH+Wqq67iiiuu8Crv0aMHK1eupFOnTtXyurfffjsXX3yx5/6+ffsYPnw4d955J9dff72n/OjdExYvXkxERITnvt2u371ERER8RcGzPCtWwNixkJoKMTEQEgK5ufDLL6Z81iyfh88uXbpw5plnAtC/f3+KioqYPHkyn3zyCbfeeqtP23I8wsPD6d27d7U9f/PmzWnevLnn/o4dOwCIi4s75uv27NlTy26JiIj4Sd3t7rEscLkqf8vJgWeegZQUaN0aQkPBZjN/nnaaKX/2WVOvMs9rWVX69opD6IEDBzxlP/30E5dddhlRUVEEBwfTvXt35syZ4/W4Q4cOMXLkSDp16kRYWBiNGzdm4MCBLFu2rMxr5OXlMW3aNDp27EhwcDANGzZkwIABrFixAjCztLOzs/nPf/5T5tJ2RZfaP/vsM/r06UNoaCj169fnwgsvZOXKlV51ii+Lr1+/nuuuu46IiAiaNGnCbbfdRnp6+sn+04mIiIif1N0ez7w8GDGi8o/LyIA1a8DhgMOHyx4vLIRFi2DwYDjqMu4xffghVOF4wu3btwNw+umnA5CYmMjFF1/M2WefzUsvvURERAT//e9/ueaaa8jJyeGvf/0rAKmpqQBMnjyZpk2bkpWVxbx58+jfvz9ff/21JzgWFhYyePBgli1bxt13383AgQMpLCxk1apVJCUl0bdvX1auXMnAgQMZMGAADz30EFD20nZps2fP5oYbbuCiiy7i/fffJy8vj5kzZ3pe+5xzzvGqf+WVV3LNNdfwt7/9jbVr1zJhwgQA3njjjRP+d+vatSsHDx4kOjqaQYMG8fDDDxMXF3fCzyciIiLHr+4GzxNVUGAmEVW0s0xAAOTnm3o+VFRURGFhoWeM58MPP8y5557LZZddBsDIkSPp3Lkz33zzDQ6H+VgHDRpEcnIyEydO5Oabb8Zut9O+fXteeOEFr+cdNGgQO3bs4Nlnn/UEz/fff5/ExEReffVVbr/9dk/9oUOHev7eu3dv7HY7jRo1+tPL6m63m/vuu4+uXbuyaNEiz9jKSy65hDZt2jBu3Di+++47r8f87W9/47777gPgggsuYMuWLbzxxhu8/vrrlV4Ts02bNjzyyCN0796d4OBgfvjhB2bOnMmXX37Jzz//TLNmzSr1fCIiIlJ5dTd4Op2ml7Gy1q2D22+HiAgICyt7PCsL0tPN5fYuXSrXnpNwdLDr2LEjn376KQ6Hgy1btrBp0yZmzZoFmN7KYpdccgnz58/n999/p2PHjgC89NJLvPLKK2zYsIG8vDxP3Q4dOnj+vmjRIoKDg7nttttOqt3Ffv/9d/bu3cvdd9/tNaEnLCyMK6+8kpdffpmcnBzP7j6AJ1QXi4+Px+VycfDgQZo0aVKp17/pppu87g8YMIABAwbQp08fZs6cyTPPPHMC70pEREQqo+6O8bTZzKXtyt569IBOneDgQfMcdnvJzWYz5Z07m3qVed6T3LXm7bff5scff+Sbb77hH//4Bxs3buS6664DSsZ5jh07lsDAQK/byJEjAUhOTgbg//7v/0hISODss8/m448/ZtWqVfz4449cfPHF5Obmel7v0KFDxMbGVtms75SUFABiYmLKHIuNjcXtdpOWluZV3rBhQ6/7ziPhvXQ7T8ZZZ53F6aefzqpVq6rk+UREROTY6m6P54my282SSWPHwtat0LRpyaz2/fuhQQMYNcrU86GOHTt6JhQNGDCAoqIiXnvtNT766CO6du0KwIQJExg+fHi5j2/fvj0A7777Lv379+fFF1/0Op6Zmel1v1GjRixfvhy3210l4bM4RO7bt6/Msb1792K32/2yLJRlWVpSSURExEf0E7c8ffuaJZO6dzcTjHbsMH/26OGXpZTKM3PmTBo0aMCkSZNo164d7dq149dff+XMM88s91a/fn3AzER3HnXZ/7fffiszs3zw4MG4XC7eeuutY7bD6XQeVw9k+/btadasGbNnz8YqNcM/Ozubjz/+2DPT3ZdWrVrFH3/8Ua3LPomIiEgJ9XhWpG9f6N0b1q+HtDTT09m5s897OivSoEEDJkyYwP3338/s2bN5+eWXGTx4MIMGDeKvf/0rzZo1IzU1lY0bN7J69Wo+PDLedciQIUyfPp3Jkydz3nnn8fvvvzNt2jROO+00r7Gh1113HW+++Sb//Oc/+f333xkwYABut5vvv/+ejh07cu211wJmlvjSpUv5/PPPiYmJoX79+p7e1dLsdjszZ87khhtuYMiQIfzjH/8gLy+PJ554gsOHD/PYY49V67/XGWecwY033uhZGuqHH37giSeeoGnTptx///3V+toiIiJiKHgei90ORy5j10R33nknzz//PNOmTWPjxo388MMPPPLII9x9992kpaXRsGFDOnXqxNVXX+15zAMPPEBOTg6vv/46M2fOpFOnTrz00kvMmzfPa81Nh8PBwoULmTFjBu+//z5PP/009evX54wzzvDaMeiZZ55h1KhRXHvtteTk5HDeeedVuE3m9ddfT7169ZgxYwbXXHMNAQEB9O7dm8TERPpWcy9yp06deOWVV9i3bx/5+fnExsZy7bXXMmnSpHLHnYqIiNRabneN7TizWVYVr2xexTIyMoiIiCA9Pb3cNSJdLhfbt2/ntNNO077bUqPpuyoiItXOT1t+/1leK1Yz4q+IiIiInJziLb9Xr4bISGjVyvxZvOX3kZ0H/UnBU0RERKS2c7tNT2dqKjRrZja7SU83a5K3aWMuu//736aeH2mMp4iIiEhtY1lmmcekJNi5E1atgiVLTHnxutihoRAVZdYSb9oUNmwwYz/9OH9FwVNERESkprIs04u5c2fJLSnJ3ErtPkhKCrhcZu3xgADzZ716JcdDQuDAgZJQ6icKniIiIiI1QWamd8AsvmVnl18/MBBatICWLc39lBRo1MjMZD9abq6ZaOSHzVpKU/AUERER8aXc3JJey9IBs6LeSLvdjNts2dLc4uLMnzExJcskud3w009mIlFkpPdW3cWX5Xv0MEsr+ZGCp4iIiEh1yM+H3btLLo8XB8yDByt+TNOmJQGzOGQ2a2Z6N4+lhm75fTQFTxEREZGTUVQE+/aVvUS+d6/pbSxPVJR3wGzZ0lw2P5l1nou3/C5ex/PAAXN5vUcPEzprwJbfCp4iIiIix8Oy4NChsgFz1y4ote20l7Aws55m8eXx4l7M+vWrp401fMtvBU8RERGR0oqXJCp9ebz47y5X+Y8JDvYOl8UBs0ED7/GWvlCDt/xW8KxFfvvtN5566imWLl3Kvn37cDgcnH766Vx77bXcfvvtREVF+buJtUb//v0BWLp0Kf379+fbb7/908dMnjyZKVOmVFh/0KBBLF68uKqbKiIi1Skrq+xSRTt3mhnm5XE4SmaSl741auT7gFkLKXiW49AhyMio+Hh4uPl++dKrr77KyJEjad++Pffddx+dOnWioKCAn376iZdeeomVK1cyb9483zaqjnjhhRfIKPWBL1iwgIcffpg333yTDh06eMqbN2/u+Xvr1q157733vJ4nMjKy2tsqIiInyOUyl8SPvkyemlp+fZsNYmPLBsyYGLNOppwQBc+jHDoE119vlsKqSMOGMHu278LnypUrSUhI4MILL+STTz7B6XR6jl144YXce++96mk7CZ06dfK6v2nTJgC6dOnCmWeeWe5jQkJC6N27d7W3TUREKqmw0MwkP3qpov37K35M48Zllypq3hyCgnzX7lNEzRhpWoNkZJjQ6XSaZbCOvjmd5vixekSr2qOPPorNZuOVV17xCp3FgoKCuOyyywBwu93MnDmTDh064HQ6ady4MTfffDO7d+/2ekz//v3p0qULK1eupG/fvoSEhNCqVSvefPNNwPT69ejRg9DQULp27Vom2E6ZMgWbzcYvv/zC8OHDCQ8PJyIightvvJFDhw551T3eNrVq1Yq//vWvZd5f//79PZfGwVwet9lsvP/++zzwwAPExsYSHh7OBRdcwO+//+71WMuymDlzJi1btiQ4OJgePXqwaNGiY/+Di4hIzed2m1njK1fCf/8Ljz8OI0fCVVfBnXfCE0/AnDnw/fcloTMyEs44Ay67zNSZNQs++ABefx0mTYJbboEBA6B1a4XOalLnezwrGgMMZuxt6e+Vy2V2n3K7TcAMCSlbH0ydvLyKn/vo5z0ZRUVFfPPNN/Ts2ZMWLVr8af2EhAReeeUVRo8ezZAhQ9ixYwcPPfQQS5cuZfXq1URHR3vq7t+/n1tvvZX777+f5s2b89xzz3Hbbbexa9cuPvroIyZOnEhERATTpk3jiiuuYNu2bcTGxnq93rBhw7j66qv55z//yfr163nooYfYsGED33//PYFH1hyrTJsqY+LEifzlL3/htddeIyMjg3HjxjF06FA2btxIwJHLIFOnTmXq1Kn87W9/46qrrmLXrl3ccccdFBUV0b59+xN6XYCtW7cSFRVFRkYGLVu25Nprr+XBBx8k5OgvjYiInBzLguTkspfId+8262SWp169sj2YcXEQEeHbtksZdT54jhhR8bEzz4TJk0vu33ijGeqxfbsZO+wo9a9Tvz507Fhyf/z4ipfmatcO/u//Tq7dxZKTk8nJyeG0007707qbNm3ilVdeYeTIkTz33HOe8u7du3P22Wfz1FNP8cgjj3jKU1JS+OKLL+jZsycAZ555Jo0bN+axxx5jy5YtnpAZGxtLt27d+Pjjj7nzzju9XnP48OHMnDkTgIsuuogmTZpwww03MGfOHG644YZKt6kyOnXqxLvvvuu5HxAQwNVXX82PP/5I7969OXz4MI8//jjDhg3jtdde89Tr3Lkzf/nLX044eJ5zzjlcc801dOjQgdzcXBYtWsTMmTNZvnw5iYmJ2GvIkhUiIrVOenrZgJmUBDk55dcPCvIOl8W3qChN9Kmh6nzwPJUkJiYClLlcfdZZZ9GxY0e+/vprr5AXExPjCZ0AUVFRNG7cmFatWnn1bHY8krh37txZ5jVvuOEGr/tXX301t9xyC4mJidxwww2VblNlFA8vKBYfH+9pZ+/evVm5ciUul6tMG/v27UvL4n1tT8DDDz/sdf+SSy6hVatWjB07lk8//ZRhw4ad8HOLiJwSsrNLxmCWHouZnl5+/YAAM+by6KWKmjSpMetTyvGp88Hzww8rPnb0d/Xdd2HbNjO5KDLS9NRX5LHHzBCQ43nekxEdHU1oaCjbt2//07opR2ZExcTElDkWGxtbJjiWt/xSUFBQmfKgI+MGXOWMLWjatKnXfYfDQcOGDT1tqWybKqNhw4Ze94vHv+bm5nq99tFtrKjsZNx4442MHTuWVatWKXiKiBTLzy9/Jnlycvn1bbayW0a2bGlmlzvqfGQ5JdT5T7EyO08FB5uxnXZ7ya0iTufJ7Wp1vAICAjj//PNZtGgRu3fv9lrS52jFQWzfvn1l6u3du/eEx1Iey/79+2nWrJnnfmFhISkpKZ62VKZNwcHB5OXllXmN5OTkE2p78WvvL2cm4/79+2nVqlWln/PP6DK7iJySCgvNRJ+jA+b+/RWPS4uOLnuZvEUL8wNW6qw6HzxP1JFOs+Mur04TJkxg4cKF3HHHHXz66aeeHshiBQUFLF68mIEDBwLw7rvv0qtXL8/xH3/8kY0bN/LAAw9Uedvee+89r8v1c+bMobCw0DMLvTJtatWqFb/99pvX82/evJnff//9hIJn7969CQ4O5r333uPKK6/0lK9YsYKdO3dWafD8z3/+43lNEZE6y7JMmDx6qaI9eyreMjI83GwZefREn2NdVpQ6S8HzKOHhZp3OlBQzc708DRuaer7Sp08fXnzxRUaOHEnPnj1JSEigc+fOFBQU8Msvv/DKK6/QpUsX5s2bx9///neee+457HY7gwcP9swgb9GiBWPGjKnyts2dOxeHw8GFF17omdV+xhlncPXVVwPQvn37427TTTfdxI033sjIkSO58sor2blzJzNnzqTRCS6Y2qBBA8aOHcvDDz/M7bffzogRI9i1axdTpkw54Uvty5Yt45FHHmHYsGG0bt0al8vFokWLeOWVVxg4cCBDhw49oecVEalRLMvMtj16kk9SUsU/HENCyl4ib9lSM8nFi4LnURo1MovD17Sdi+644w7OOussnnrqKR5//HH2799PYGAgp59+Otdffz2jR48G4MUXX6RNmza8/vrr/Pvf/yYiIoKLL76YGTNmlBkTWRXmzp3LlClTePHFF7HZbAwdOpSnn37aq1f2eNt0/fXXs3fvXl566SXefPNNunTpwosvvsjUqVNPuH3Tpk2jXr16vPDCC7zzzjt06NCBl156iVmzZp3Q88XExBAQEMD06dNJTk7GZrPRrl07pk2bxr333qtL7SJS+2RklJ3ks3OnmQBUnsDA8reMjI7WTHL5UzbLqmjwRc2QkZFBREQE6enphJfTzehyudi+fTunnXYawb4YdCmAWUB+6tSpHDp0qFrGjtZF+q6KiF/l5pYNlzt3wuHD5de326FZs7LrYcbEaCa5lPFnea2YejxFRETqkvx8s7j60b2YBw9W/JijZ5LHxZnQeWQjEJGqouApIiJSGxUVwb59ZXsw9+6teCZ5VFTZS+QtWvhmmRYRKhk8ly5dyoABA8o9tnLlSq8ZvatXr+b+++9n1apVOBwOBg4cyKxZs2hd0eKXUqtMmTKFKVOm+LsZIiJ1n2XBoUNlA+auXRXPJA8LMzPJSy9VFBdntuET8aMT6vF89NFHywTQLl26eP6+adMm+vfvT7du3ZgzZw4ul4tJkybRr18/1qxZc8KzlEVEROosy4K0tLLjMJOSoJwNPADTU3n0dpFxcdCggSb6SI10QsGzXbt2x1yvcNKkSTidTubPn+8ZYNqzZ0/atWvHrFmzePzxx0+stSIiInVBZqZ3wCz+e2Zm+fUdjvJnkjdqpIAptUqVj/EsLCxk/vz53HzzzV6zmlq2bMmAAQOYN2+egqeIiJwaXK7yt4xMTS2/vs1mtoc8OmDGxJj9ykVquRMKnqNGjeLaa68lNDSUPn368NBDD3HOOecAsHXrVnJzc4mPjy/zuPj4eJYsWYLL5dJyMiIiUncUFJjde46+TF7Olr0ejRuXXaqoeXM4anc6kbqkUsEzIiKCf/3rX/Tv35+GDRuyZcsWnnjiCfr378+CBQsYNGgQKSkpAERFRZV5fFRUFJZlkZaWRkxMTLmvkZeX57Vfd8axVnIXERHxJbe7ZCZ56ZC5Z485Vp7IyPJnkoeG+rTpIjVBpYJn9+7d6d69u+d+v379GDZsGF27duX+++9n0KBBnmO2Y4w5OdaxGTNmnNRONSIiIifNsiA5uewl8t27zTqZ5alXr2wPZlyctowUKeWkx3hGRkYyZMgQXnrpJXJzcz1bIBb3fJaWmpqKzWYjMjKywuebMGEC99xzj+d+RkYGLVq0ONlmioiIlC89vWzA3LnT7PRTnqAg73BZfIuK0kQfkT9RJZOLinfdtNlstGnThpCQENauXVum3tq1a2nbtu0xx3c6nU6cTmdVNEtERKREdnbJ5fHSl8nT08uvHxBgxlwevVRRkybaMlLkBJ108ExLS2P+/Pl069bNEyiHDh3K3LlzmTlzJvWPLFablJREYmIiY8aMOdmXrHbJOcmkuyo4EQERwRFEh1b//uTHGpJQWmJiIv3796+w/owZMxg/fnyZ8mXLlvHcc8/x3XffcejQIUJCQujcuTM33XQTN998M/Xq1fO0Y9SoUTz//PMn/mZERHwlL897JnlxyExOLr++zVZ2y8iWLc3scoc2+BOpSpX6H3X99dcTFxfHmWeeSXR0NH/88QdPPvkkBw4c4K233vLUmzp1Kr169WLIkCGMHz/es4B8dHQ09957b1W/hypVUFTAP+f/k21p2yqs07pBa96/8n0CA6p3D9uVK1d63Z8+fTqJiYl88803XuWdOnXy/P2qq64q828cFxdX5rknT57MtGnT6Nu3L9OnT6dNmzbk5OSwYsUKpkyZwubNm3nqqaeq8N2IiFSxwkKzPeTRl8j37694y8jo6LKXyVu0AF1pE/GJSgXP+Ph4PvjgA1566SWysrKIiorinHPO4Z133qFXr16eeh06dGDp0qWMGzeOq666ymvLzJq+a5HD7qBZ/Wb8sv8XWka0LHN8Z/pOmtVvhsNe/b8FH71If6NGjbDb7cdcvL9JkybHPA7w4YcfMm3aNP72t7/x6quvevWUDh48mPvvv79M6BUR8RvLMmHy6KWK9uypeMvI8HCzZeTRE32OXMkREf+oVHoaP358uZdsy9OzZ0+++uqrE2pUVXIVmm3GnAFOT8AqdBdS6C4kwBbg1WtZXPfmM25mWdIyCt2FhDvDzRhWG2TmZRLhjOCWbreQV5RX4fPabXaCAmruOmzTpk2jQYMGPPvss+Venq9fvz4XXXRRmfJ33nmHRx99lJ07d9KuXTseeeQRhgwZ4lXnjz/+YPLkyXz11Vekp6fTunVrRo8ezahRozx1li5dyoABA3jvvff49ddfefvtt8nMzGTAgAG89tprhIaGcu+99zJ37lzADN147rnnCAsLq+J/CRGpUSzLLKx+9HaRSUnm8nl5QkLKXiJv2VIzyUVqqDo/eGXEhyMAeHfYu0QEmxPR3I1zeee3d7io9UXcefadnro3zr2RvKI8Xhv6Gv3i+vHF1i/IKchhV8YuooKjsLAY1GYQ3Zt258Z5N5KRl8G/L/k3cRHmUvbX277m+R+f5+xmZ/PguQ/6/s0Cs2fP5vXXX8ftdtOlSxdGjx7Nrbfe6jm+b98+1q1bxzXXXENoJdaQW7BgAT/++CPTpk0jLCyMmTNnMmzYMH7//Xdat24NwIYNG+jbty9xcXE8+eSTNG3alC+++IK77rqL5ORkJk+e7PWcEydOZMCAAbz11lvs2LGDsWPHct111+FwODjjjDN4//33+eWXX5g4cSL169fn2WefrZp/JBHxv4yMspfIk5LMBKDyBAaWv2VkdLRmkovUInU+eJ4Im83GLd1uYVnSMnIKcgBwFbloENyAW7rdctyTfnzt+uuv59JLL6VFixYcPHiQ119/ndtuu41t27Yxffp0wEzyAjjttNMq9dy5ubl89dVXnsliPXr0IDY2ljlz5nh6we+55x7q16/P8uXLPdulXnjhheTl5fHYY49x11130aBBA89zxsfH8+abb3rub9q0iaeffpq77rqLJ554wvP4lStX8t577yl4itRGubllL5Hv3AmHD5df326HZs3KrocZE6OZ5CJ1QJ0Pnh+O+BAwl8SLDe84nMvaX0aAzXvf23eHv+up2yi0Ef3i+rF4y2J6Nu3J1rSt9IvrR/emZgH91y97vczznt/6fM5rdR52m39Oju+9957X/SuvvJKhQ4d6Qt/JjK8dMGCAJ3SCGUvauHFjdu7cCYDL5eLrr78mISGB0NBQCkuNu7rkkkt4/vnnWbVqFYMHD/aUH32ZvmPHjgBceumlZco/+eQTsrKydLldpKbKzzeLqx+9VNHBgxU/5uiZ5HFxJnQGVu/ETRHxnzofPIMdZdcMddgd5U4OOrpuca/nnqw91Auq59XbWZnn9acbb7yR+fPn89NPPzF48GDPDPft27dX6nmKNwYozel0kntkgeWUlBQKCwt57rnneO6558p9juSjljI5elvVoCP7E1dU7nK5FDxF/K2oyMwkP7oXc+/eimeSR0WVv2XkMdZ0FpG6qWalpBqme9Pu9Ivrx7xN8xjWYZint7M2KV7c337kElVMTAxdu3blyy+/JCcnp1LjPI+lQYMGBAQEcNNNN3lNJCqtspf3RcSPLMv0Vpa3ZWRFM8nDwsxM8tJLFcXFQamrJSJyalPwPAabzcat3W8luyCbW7vfWmPHdh7LO++8Q2BgID179vSUPfTQQ1x99dXcddddZZZTAsjKymLFihXlzmyvSGhoKAMGDOCXX34hPj7e00spIjWcZUFaWtkezKQkcLnKf0xwcNntIuPioEEDTfQRkWNS8PwT3Zp247XLXvN3M/7UE088wYYNGzj//PNp3ry5Z3LRl19+yZQpU4iOLtlpacSIETz00ENMnz6dTZs28be//c2zgPz333/Pyy+/zDXXXFOp4AnwzDPPcM4559CvXz8SEhJo1aoVmZmZbNmyhc8//7zMwvci4mOZmd4Bs/jvmZnl13c4yp9J3qiRAqaInBAFzzqiQ4cOfPbZZyxYsIC0tDRCQkLo1q0b77//Ptdee22Z+tOmTeOCCy7gueee44EHHiA5OdmzZeY999zDP/7xj0q3oVOnTqxevZrp06fz4IMPcvDgQSIjI2nXrh2XXHJJVbxNETkeLpf3lpHFt9TU8uvbbGZ7yKMDZkyM2a9cRKSK2CyrotHgNUNGRgYRERGkp6d7lugpzeVysX37dk477TTPXvEiNZG+q1LlCgrM7j1HXyLfv7/ixzRuXHapoubNQcNjROQk/FleK6YeTxGRms7thn37yi5VtGePOVaeyMjyZ5JX0YRCEZEToeApIlJTWBYkJ5c/kzw/v/zH1KtXtgczLk5bRopIjaTgKSJSWW43rF9vZoM3aACdO1d+V5309LIBc+dOs9NPeYKCvMNl8S0qShN9RKTWUPAUEamMFSvg+edh40bIywOnEzp2hNGjoW/fsvWzs8vfMjIjo/znDwgwYy6PXqqoSRNtGSkitV6dCZ41fI6UiL6jdcGKFTB2rJkdHhMDISGmh/KXX+Cee8ytYUPvsZhH7djlYbOV3TKyZUszu9xRZ07NIiJeav3ZLeDIUh8FBQWEhIT4uTUiFSvev96hUFE7ud2mpzM1Fdq2hawsM+EnNxdycszyRWPGQPfuZS99R0eXvUzeooXpLRUROYXU+p+AgYGBOJ1O0tPTqV+/fq3cXUhODRkZGQQEBHh+WZJaZv162LDB9HJu2GAuoZfmdJoQGhMDvXp5T/SpV88/bRYRqWFqffAEiI6OZs+ePezevZuIiAgCAwMVQKXGsCyL7OxsMjIyiImJ0XezNsrJgc8+gx07zCQfm82Mt4yKMqEyJMSU79kDN90E557r7xaLiNRIdSJ4Fi9UmpyczJ49e/zcGpGybDYbkZGRRGiJm9olORk+/xwWLzaLsluWCZ3Nm5uF2EsPm8jKMr2eDRr4r70iIjVcnQieYMJneHg4BQUFFBUV+bs5Il4CAwN1ib022bYN5s2DZcug+HzSsaPp5dy3z1xOL91zbVkmmPboYZZWEhGRctWZ4FksMDCQwMBAfzdDRGoby4LVq2HuXPjtt5Lyrl1h2DA480xYudLMat+61cxIL57Vvn+/6ekcNUpLHomIHEOdC54iIpVSUABLl8Inn5glkMCEx3POMYGzbduSun37wqxZJet4HjhgLq/36GFCZ3nreIqIiIeCp4icmjIzYdEimD/f7EAEpgdz0CC47DJo1Kj8x/XtC717n/zORSIipyAFTxE5tezbB59+Cl99ZXYeArPO5mWXwUUXHd/SR3a7uQQvIiKVouApIqeGTZvMhKGVK814ToDWrc3l9HPO0W5BIiI+oDOtiNRdbjesWmUC56ZNJeU9e8Lw4abXUuuqioj4jIKniNQ9Lhd8/bW5pL5vnylzOGDAALjiCrObkIiI+JyCp4jUHWlpZrLQokVm8hBA/foweDAMGaLF3UVE/EzBU0Rqv6QksxxSYiIUFpqymBi4/HI4/3wIDvZr80RExFDwFJHaybLMQu/z5sHPP5eUd+xoLqf37q0ljkREahgFTxGpXQoLYflyEzi3bTNlNhv06WNmqHfo4N/2iYhIhRQ8RaR2yM6GL76Azz+H5GRT5nTChReaNThjYvzbPhER+VMKniJSsx08CJ99Bl9+afZFB4iMhKFDzaSh+vX92jwRETl+Cp4iUjP98YeZMLR8uVmPE6BFC7P+5nnnQWCgX5snIiKVp+ApIjWHZcFPP8HcubBuXUn5GWeY8Zs9emjBdxGRWkzBU0T8Lz/fLIX0ySewe7cpCwiAc881M9Rbt/Zn60REpIooeIqI/2RkwMKFZtH39HRTFhoKF19sxnBGR/u3fSIiUqUUPEXE9/bsMdtZfv216e0EaNTILPh+4YUmfIqISJ2j4CkivmFZsHGjGb/5ww/mPkDbtmb85l/+Yi6vi4hInaXgKSLVq6gIVq40C75v3lxS3quXmaHeubMmDImInCIUPEWkerhcsGSJmTB08KApCwyEgQPNhKHmzf3ZOhER8QMFTxGpWqmpZnehRYvMbkNgFnm/9FIYMgQiIvzbPhER8RsFTxGpGjt2mN7Nb781+6kDxMaa3s2BA832liIickpT8BSRE2dZsGaNGb/5yy8l5Z07mwlDZ52l8ZsiIuKh4CkilVdYaHo2P/nE9HSCCZh/+YsJnKef7s/WiYhIDaXgKSLHLysLFi82YzhTU01ZcDBcdBFcdhk0aeLf9omISI2m4Ckif+7AAbPg+5IlZrY6QFSU2V3o4oshLMy/7RMRkVrBfrJP8Nprr2Gz2Qgr5wfP6tWrueCCCwgLCyMyMpLhw4ezbdu2k31JEfGVzZvh8cfhjjtML6fLBa1awd13w+uvw1VXKXSKiMhxO6kezz179jB27FhiY2NJL95n+YhNmzbRv39/unXrxpw5c3C5XEyaNIl+/fqxZs0aGjVqdFINF5FqYllmZ6G5c2HDhpLy7t3N+M1u3TRhSERETojNsor3rau8oUOHYrPZiIqK4qOPPiIrK8tz7OqrryYxMZGtW7cSHh4OwM6dO2nXrh1jxozh8ccfP67XyMjIICIigvT0dM/ziEg1yMuDb74xE4b27jVlDgecd55ZEqlVKz82TkREarLjzWsn3OP57rvv8u2337JhwwYefPBBr2OFhYXMnz+fm2++2evFW7ZsyYABA5g3b95xB08RqWbp6TB/PixYAJmZpqxePRg82IzhjIryb/tERKTOOKHgefDgQe6++24ee+wxmpez7d3WrVvJzc0lPj6+zLH4+HiWLFmCy+UiODj4RF5eRKrC7t2md/Obb6CgwJQ1bmx6Ny+80MxWFxERqUInFDxHjhxJ+/btSUhIKPd4SkoKAFHl9JRERUVhWRZpaWnExMSUOZ6Xl0deXp7nfkZGxok0UUTKY1mwfr0Zv/njjyXlp59uxm/26QMBAf5rn4iI1GmVDp4ff/wxn3/+Ob/88gu2P5lgcKzjFR2bMWMGU6dOrWyzRORYiorgu+/MDkNbtpgym83sLDR8OHTsqAlDIiJS7SoVPLOyshg1ahR33nknsbGxHD58GID8/HwADh8+TGBgIA0bNgRKej5LS01NxWazERkZWe5rTJgwgXvuucdzPyMjgxYtWlSmmSJSLCfHrL356adw6JApCwqC88+Hyy+HZs382z4RETmlVCp4Jicnc+DAAZ588kmefPLJMscbNGjA5ZdfzkcffURISAhr164tU2ft2rW0bdu2wvGdTqcTp9NZmWaJyNGSk826m4sXm/AJEBEBQ4aYSUMREf5tn4iInJIqFTybNm1KYmJimfLHHnuMb7/9lkWLFhEdHY3D4WDo0KHMnTuXmTNnUr9+fQCSkpJITExkzJgxVdN6EfG2bZuZMPS//5nL6wDNm5sJQwMGmN5OERERPzmpdTyL/fWvfy2zjuemTZvo1asXPXr0YPz48Z4F5FNTUyu1gLzW8RT5E5YFq1eb8Zu//lpS3qWLGb955pkavykiItWq2tfx/DMdOnRg6dKljBs3jquuugqHw8HAgQOZNWuWdi0SqQoFBbB0qenhTEoyZXY7nHOO6eFs186PjRMRESmrSno8q5N6PEWOkpkJixaZRd/T0kxZcDBcfLFZ8L1xY/+2T0RETjl+7/EUkSq2b5+Znf7VV2Z7S4CGDeGyy2DQILPbkIiISA2m4ClS023aZMZvrlxpxnMCtG5tLqf362f2UxcREakF9BNLpCZyu2HVKhM4N20qKe/Z0+wwFB+vCUMiIlLrKHiK1CQuF3z9tbmkvm+fKXM4zFJIV1wBcXF+bZ6IiMjJUPAUqQnS0sxkoUWLzOQhgLAwuOQSs+h7gwb+bZ+IiEgVUPAU8aekJLMcUmIiFBaasqZNzXaWF1xgZquLiIjUEQqeIr5mWfDbb2b85s8/l5R36GDGb/bubdbjFBERqWMUPEV8pbAQli83gXPbNlNms0GfPmb8ZseOfm2eiIhIdVPwFKlu2dnwxRfw+eeQnGzKnE5zKf3yyyEmxr/tExER8REFT5HqcvAgfPYZfPkl5OaasshIs7vQ4MFQv75fmyciIuJrCp4iVW3LFnM5fflysx4nQIsWZvzmeedBUJB/2yciIuInCp4iVcGy4KefTOBcu7akPD4ehg+HHj204LuIiJzyFDxFTkZ+vlkK6ZNPYPduUxYQYLayHDbMbG0pIiIigIKnyInJyICFC82i7+nppiw0FAYNgssug+ho/7ZPRESkBlLwFKmMPXvMdpZff216O8GEzMsvh4suMuFTREREyqXgKfJnLAs2boS5c+GHH8x9gDZtzPjNvn3NfuoiIiJyTPppKVKRoiJYudJMGNq8uaS8Vy8zfrNLF00YEhERqQQFT5GjuVywZImZMHTwoCkLDIQBA8wOQy1a+LN1IiIitZaCp0ix1FSzu9CiRWa3ITCLvF96qblFRvq1eSIiIrWdgqecGtxuWL8e0tKgQQPo3BnsdnNsxw7Tu/ntt2Y/dYDYWNO7OXCg2d5SRERETpqCp9R9K1bA88+bCUJ5eSZIduxoZqFv2wa//FJSt1MnM37zrLNKgqmIiIhUCQVPqdtWrICxY81l9JgYCA6GffvM5fTFi00AbdgQ/vIX08PZvr2/WywiIlJnKXhK3eV2m57O1FRo1QqSk80+6vn5ZrKQy2Uurb/8sgmlIiIiUq0UPKXuWr8efvvNBNBffzV/AgQFQZMmEBICWVkmkCp4ioiIVDsFT6l7LMsEzWeegZ07TcC02cyuQk2bQlSUGb9ZVAQpKWbCkYiIiFQ7BU+pO/LzYelSs6VlUpLZT91uh3r1IC4OwsO96+fmmolGDRr4pbkiIiKnGgVPqf1SU2HhQjNhKCPDlAUHm7U3o6Jg0yazHmdplgX790OPHmZpJREREal2Cp5Se23dano3ly0rWX+zUSMYOtQslVSvHsTHm1ntW7eay+whIaanc/9+09M5apSWTRIREfERBU+pXdxu+P57+OwzWLeupLxjR7j8cujdGwICSsr79oVZs0rW8TxwwFxe79HDhM6+fX3/HkRERE5RCp5SO+TkmP3TP//chEcwAfOcc+Cyy+D00yt+bN++JpBWtHORiIiI+ISCp9RsBw6YsPnll+YSOZjxmhdfDJdcAtHRx/c8djt07Vp97RQREZE/peApNY9lwYYNZvzmqlXmPkDz5uZy+oAB2j9dRESkFlLwlJqjsNBMFPr0UzMZqFj37iZw9uhh1uMUERGRWknBU/wvI8MshbRgQcli7kFBMHCgmaEeF+ff9omIiEiVUPAU/0lKMr2bS5eaxd/BrLt56aVmDOfRC76LiIhIrabgKb5lWbB6NXzyCaxZU1Letq25nH7OOeDQ11JERKQu0k948Q2XCxITzfqbu3ebMpsN+vQxgbNjR43fFBERqeMUPKV6JSebsZuLF0NWlikLDTU7Cw0ZAk2a+Ld9IiIi4jMKnlI9Nm824ze/+w6KikxZ06ZmsffzzzfhU0RERE4pCp5SdYqKYOVKczl948aS8i5d4IoroFcv7RYkIiJyClPwlJOXnW12Fvr8czh0yJQ5HHDeeaaHs3Vr/7ZPREREagQFTzlxe/easPnVV2byEEBEBAwebLazbNDAv+0TERGRGkXBUyrHsmDtWjN+88cfS7azbNXK9G6ed55Z/F1ERETkKAqecnzy8+F//zOBc8eOkvJevcxySPHxWg5JREREjknBU47t8GFYuNDc0tNNmdNpZqZfdhk0a+bX5omIiEjtoeB5qnK7Yf16szd6gwbQubP3jPPt203v5rffQmGhKYuONnunX3QRhIX5p90iIiJSayl4nopWrIDnnzdLHuXlmR7Mjh1h1CgzG/2zz+C330rqt29vlkPq3VvbWYqIiMgJq9SiimvWrOHSSy8lLi6OkJAQoqKi6NOnD++++26ZuqtXr+aCCy4gLCyMyMhIhg8fzrZt26qs4XKCVqyAsWPNfumRkWZSUHg4LFsGw4fDPfeY0Gm3w7nnwqxZ5qY91EVEROQkVSpJHD58mBYtWnDdddfRrFkzsrOzee+997jpppvYsWMHDz74IACbNm2if//+dOvWjTlz5uByuZg0aRL9+vVjzZo1NGrUqFrejPwJt9v0dKamQtu2ZsH3vXvN2psFBZCTA3v2wO23m0vq0dH+brGIiIjUITbLKl4P58T17t2bvXv3kpSUBMDVV19NYmIiW7duJTw8HICdO3fSrl07xowZw+OPP37cz52RkUFERATp6eme55ITtHYt3Hyz6el0OmHTJsjNNceCg80anHY7vPMOdO3q16aKiIhI7XG8ea1K9i+Mjo7GceQybGFhIfPnz+fKK6/0euGWLVsyYMAA5s2bVxUvKSciLc2M6QwIMOM7c3PNmpunn26WQ2re3CyblJbm75aKiIhIHXRCg/bcbjdut5u0tDQ+/PBDvvjiC55//nkAtm7dSm5uLvHx8WUeFx8fz5IlS3C5XAQHB5f73Hl5eeTl5XnuZ2RknEgTpTwNGpgezfXrzWV3p9NMHCr+LHJzTZl2HBIREZFqcEI9niNHjiQwMJDGjRszZswYnn32Wf7xj38AkJKSAkBUVFSZx0VFRWFZFmnH6FGbMWMGERERnluLFi1OpIlSnvBws696ZmbJTPbi0GlZsH8/dOpkllYSERERqWInFDwnTpzIjz/+yIIFC7jtttsYPXo0s2bN8qpjO8YuNsc6NmHCBNLT0z23Xbt2nUgT5Wjbt8MDD0BsrFmDMzjYXFYvKoKsLNi61fR0jhrlvZ6niIiISBU5oUvtcXFxxMXFAXDJJZcAJjDecsstNGzYECjp+SwtNTUVm81GZGRkhc/tdDpxOp0n0iypyObNMGmS6e3s1QumToU33jDjPA8cML2fPXqY0Nm3r79bKyIiInVUlSzMeNZZZ/HSSy+xbds2evbsSUhICGvXri1Tb+3atbRt27bC8Z1SDdavN0EzNxc6dIApU6BePRg48Ng7F4mIiIhUsSpJGomJidjtdlq3bo3D4WDo0KHMnTuXzMxMT52kpCQSExMZPnx4VbykHI81a0xPZ26umbU+fboJnWBCZteuZpH4rl0VOkVERKTaVarH8+9//zvh4eGcddZZNGnShOTkZD788EM++OAD7rvvPs/C8FOnTqVXr14MGTKE8ePHexaQj46O5t57762WNyJH+eEHmDHD7LPesydMnGiWThIRERHxk0oFzz59+vDmm2/yn//8h8OHDxMWFsYZZ5zBO++8w4033uip16FDB5YuXcq4ceO46qqrcDgcDBw4kFmzZmnXIl9Yvtxsc1lUBH36wH33QWCgv1slIiIip7gq2bmoOmnnokr6+mt45hmzPNJ558GYMWbBeBEREZFqcrx5rUomF0kNsXAhvPii+ftFF2lpJBEREalRFDzrinnzzBJJAEOHwh13wDHWSxURERHxNQXP2s6y4IMP4L33zP0RI+CmmxQ6RUREpMZR8KzNLAvefhs++sjcv+kmuPpq/7ZJREREpAIKnrWVZcGrr8Lnn5v7t98Ol1/u3zaJiIiIHIOCZ23kdsPzz8OSJeb+yJEweLB/2yQiIiLyJxQ8a5vCQnj6afj2WzOO8+67zfaXIiIiIjWcgmdtUlAATzwBK1eatTnHjoVzzvF3q0RERESOi4JnbZGfD48+Cj//DA4HTJgAZ53l71aJiIiIHDcFz9ogNxemT4e1a8HphAcfhG7d/N0qERERkUpR8KzpsrNh8mT4/XcICYEpU6BTJ3+3SkRERKTSFDxrsowMeOgh2LYNwsJg2jRo187frRIRERE5IQqeNVVqqgmdSUkQEQEPPwytWvm7VSIiIiInTMGzJkpOhgcegL17ISoKHnkEmjf3d6tERERETord3w2Qo+zbB+PGmdDZuDE89phCp4iIiNQJ6vGsSXbvNj2dqakQG2t6OqOj/d0qERERkSqhHs+aYvt2GD/ehM64ONPTqdApIiIidYiCZ02weTNMnAjp6dCmDcyYAQ0a+LtVIiIiIlVKwdPf1q83C8JnZUGHDubyeni4v1slIiIiUuU0xtOf1qwxOxLl50N8vFk+KTjY360SERERqRYKnv7yww/mknphIfTsaS61BwX5u1UiIiIi1UbB0x+WL4dZs6CoCPr0gfvug8BAf7dKREREpFopePra11/DM8+AZcF558GYMRAQ4O9WiYiIiFQ7BU9fWrQIXnjB/P2ii2DUKLBrfpeIiIicGhQ8feWTT+D1183fhw6FO+4Am82vTRIRERHxJQXP6mZZMGcOvPuuuX/VVXDzzQqdIiIicspR8KxOlgVvvw0ffWTu33QTXH21f9skIiIi4icKntXFsuDVV+Hzz83922+Hyy/3b5tERERE/EjBszq43fD887Bkibk/ciQMHuzfNomIiIj4mYJnVSsshKefhm+/NeM4774bBg70d6tERERE/E7BsyoVFMATT8DKlWZtzrFj4Zxz/N0qERERkRpBwbOq5OfDo4/Czz+DwwETJsBZZ/m7VSIiIiI1hoJnVXC5YPp0+O03s9/6Qw9Bt27+bpWIiIhIjaLgebKys2HKFNi0CUJCYPJk6NzZ360SERERqXEUPE9GRgZMmgRbt0JYGEybBu3a+btVIiIiIjWSgueJSkuDBx+EpCSIiICHH4ZWrfzdKhEREZEaS8HzRCQnwwMPwN69EBUFjzwCzZv7u1UiIiIiNZrd3w2odfbtg3HjTOhs3Bgee0yhU0REROQ4qMezMnbvNj2dqakQG2t6OqOj/d0qERERkVpBPZ7Ha/t2GD/ehM64ONPTqdApIiIictwUPI/H5s0wcSKkp0ObNjBjBjRo4O9WiYiIiNQqutRemtsN69ebGesNGpj1ODduhKlTITcXOnQwa3bWq+fvloqIiIjUOgqexVasgOefN0EzLw+cTmja1ATO+vUhPt7sSBQc7O+WioiIiNRKCp5gQufYsWb8ZkyM2YHowAFYvhwCA2HYMLMjUVCQv1sqIiIiUmspeLrdpqczNRXatgWbzfx9zx4TQC0LcnLAoX8qERERkZOhyUXr15vL6zExJnQmJ5stMC3LzFrv3Nnsw75+vb9bKiIiIlKrVSp4fvPNN9x222106NCBevXq0axZMy6//HJ+/vnnMnVXr17NBRdcQFhYGJGRkQwfPpxt27ZVWcOrTFqaGdMZEgL5+bBjhwmdjRpB69YQGmqOp6X5u6UiIiIitVqlgueLL77Ijh07+Ne//sXChQt55plnOHjwIL179+abb77x1Nu0aRP9+/cnPz+fOXPm8MYbb7B582b69evHoUOHqvxNnJQGDcxEotxcM4azTRszqei000wPaG6uOa7lk0REREROis2yLOt4Kx88eJDGjRt7lWVlZdG2bVu6dOnCV199BcDVV19NYmIiW7duJTw8HICdO3fSrl07xowZw+OPP37cDczIyCAiIoL09HTPc1UptxtuvBF++cWETput5JhlmcvuPXrAO++AXSMTRERERI52vHmtUknq6NAJEBYWRqdOndi1axcAhYWFzJ8/nyuvvNLrhVu2bMmAAQOYN29eZV6y+tntMHq06dHcuhWysqCoyPy5daspHzVKoVNERETkJJ10mkpPT2f16tV07twZgK1bt5Kbm0t8fHyZuvHx8WzZsgWXy1Xh8+Xl5ZGRkeF1q3Z9+8KsWdC9Oxw+bMZ5Hj5sejpnzTLHRUREROSknPQaQaNGjSI7O5sHHngAgJSUFACioqLK1I2KisKyLNLS0oiJiSn3+WbMmMHUqVNPtlmV17cv9O5dduci9XSKiIiIVImTSlUPPfQQ7733Hk899RQ9e/b0OmYrPVbyKMc6NmHCBNLT0z234kv4PmG3Q9eucO655k+FThEREZEqc8I9nlOnTuXhhx/mkUceYfTo0Z7yhg0bAiU9n6WlpqZis9mIjIys8HmdTidOp/NEmyUiIiIiNdQJdelNnTqVKVOmMGXKFCZOnOh1rE2bNoSEhLB27doyj1u7di1t27YlWPudi4iIiJxyKh08p0+fzpQpU3jwwQeZPHlymeMOh4OhQ4cyd+5cMjMzPeVJSUkkJiYyfPjwk2uxiIiIiNRKlVrH88knn2Ts2LFcfPHF5YbO3r17A2YB+V69etGjRw/Gjx+Py+Vi0qRJpKamsmbNGho1anTcDaz2dTxFRERE5KQcb16rVPDs378/3377bYXHSz/Vzz//zLhx41i5ciUOh4OBAwcya9Ys2rRpc7wvByh4ioiIiNR01RI8/UHBU0RERKRmq5adi0RERERETpSCp4iIiIj4hIKniIiIiPiEgqeIiIiI+ISCp4iIiIj4hIKniIiIiPiEgqeIiIiI+ISCp4iIiIj4hIKniIiIiPiEgqeIiIiI+ISCp4iIiIj4hIKniIiIiPiEgqeIiIiI+ISCp4iIiIj4hIKniIiIiPiEgqeIiIiI+ISCp4iIiIj4hIKniIiIiPiEgqeIiIiI+ISCp4iIiIj4hIKniIiIiPiEgqeIiIiI+ISCp4iIiIj4hIKniIiIiPiEgqeIiIiI+ISCp4iIiIj4hIKniIiIiPiEgqeIiIiI+ISCp4iIiIj4hIKniIiIiPiEgqeIiIiI+ISCp4iIiIj4hIKniIiIiPiEgqeIiIiI+ISCp4iIiIj4hIKniIiIiPiEgqeIiIiI+ISCp4iIiIj4hIKniIiIiPiEgqeIiIiI+ISCp4iIiIj4hIKniIiIiPiEgqeIiIiI+ISCp4iIiIj4hIKniIiIiPiEgqeIiIiI+ESlg2dmZib3338/F110EY0aNcJmszFlypRy665evZoLLriAsLAwIiMjGT58ONu2bTvZNouIiIhILVTp4JmSksIrr7xCXl4eV1xxRYX1Nm3aRP/+/cnPz2fOnDm88cYbbN68mX79+nHo0KGTabOIiIiI1EKOyj6gZcuWpKWlYbPZSE5O5rXXXiu33qRJk3A6ncyfP5/w8HAAevbsSbt27Zg1axaPP/74ybVcRERERGqVSvd42mw2bDbbMesUFhYyf/58rrzySk/oBBNaBwwYwLx58yrfUhERERGp1aplctHWrVvJzc0lPj6+zLH4+Hi2bNmCy+Uq97F5eXlkZGR43URERESk9quW4JmSkgJAVFRUmWNRUVFYlkVaWlq5j50xYwYRERGeW4sWLaqjiSIiIiLiY9W6nNKxLslXdGzChAmkp6d7brt27aqu5omIiIiID1V6ctHxaNiwIVDS81laamoqNpuNyMjIch/rdDpxOp3V0SwRERER8aNq6fFs06YNISEhrF27tsyxtWvX0rZtW4KDg6vjpUVERESkhqqW4OlwOBg6dChz584lMzPTU56UlERiYiLDhw+vjpcVERERkRrshC61L1q0iOzsbE+o3LBhAx999BEAl1xyCaGhoUydOpVevXoxZMgQxo8fj8vlYtKkSURHR3PvvfdW3TsQERERkVrBZlmWVdkHtWrVip07d5Z7bPv27bRq1QqAn3/+mXHjxrFy5UocDgcDBw5k1qxZtGnT5rhfKyMjg4iICNLT073WBBURERGRmuF489oJBU9fUvAUERERqdmON69V63JKIiIiIiLFFDxFRERExCcUPEVERETEJxQ8RURERMQnFDxFRERExCcUPEVERETEJxQ8RURERMQnFDxFRERExCcUPEVERETEJxQ8RURERMQnFDxFRERExCcUPEVERETEJxQ8RURERMQnFDxFRERExCcUPEVERETEJxQ8RURERMQnFDxFRERExCcUPEVERETEJxQ8RURERMQnFDxFRERExCcUPEVERETEJxQ8RURERMQnFDxFRERExCcUPEVERETEJxQ8RURERMQnFDxFRERExCcUPEVERETEJxQ8RURERMQnFDxFRERExCcUPEVERETEJxQ8RURERMQnFDxFRERExCcUPEVERETEJxQ8RURERMQnFDxFRERExCcUPEVERETEJxQ8RURERMQnFDxFRERExCcUPEVERETEJxQ8RURERMQnFDxFRERExCcUPEVERETEJxQ8RURERMQnFDxFRERExCcUPEVERETEJxz+boCISG2RnJNMuiu9wuMRwRFEh0b7sEUiIiVqwzmqWoNnVlYWDz74IHPmzCE1NZUOHTowfvx4rr322up82UqrDR+UnBh9tlJVCooK+Of8f7ItbVuFdVo3aM37V75PYECgD1smIlJ7zlHVGjyHDx/Ojz/+yGOPPcbpp5/O7Nmzue6663C73Vx//fXV+dLHraCogFs//vMPau71deOHyaakZPamVhzEYqMi6BBXN4LYqfbZSvVy2B00cDQjOesXYuu1xFbqmAXszd5Jz0bNcNh1IUlEfK+2nKOq7dUXLlzIkiVLPGETYMCAAezcuZP77ruPa665hoCAgOp6+eOWluLg12XNOBDxC0G5Lcsczw/ZSea6ZqQNctC4sR8aWIX27i/g3Jn/JCuw4iAWVtCaNQ++T2zT2h/ETqXPVo6fZVm4LTcAAfaSc1BOQQ5uy02II8RTnleYR1Z+FoEBgeRlhPPb27eQfNoyUtw52AvrY3cHAXaKHOkUBdbnp1UXs+nMZDrGNfI874GsAxS4C4gOjSbYEQyAq9BFck4yQQFBNK5X8uU7mH2Q/KJ8GoY0JCQwxKuuw+6gaVhTT91D2YfIK8qjQXAD6gXVAyC/KJ+D2QcJsAUQUz/GUzc5JxlXocurbkFRAQeyD2C32YmtH+upm5qbSk5BDhHOCOo76wNQ6C5kf9Z+bNhoFt6sTN1wZzjhznAAitxF7MvaB0Dz8Oaeuoddh8nKz6J+UH0igiM8n8WezD0ANKvfDJvN/KhMd6WTmZ9JWFAYkcGRZerGhMV4PqOMvAwy8jIIDQwlKiTK83p7MvZgYdE0rKnnB21mXibpeell6u7N3IvbctOkXhPPL6HZ+dmkudIIdgR7XRXZl7mPIquIxvUaExQQ5FXXGeCkUb2Sz35/1n4K3YU0Cm2E0+EEILcgl5TclAo/+/K+J4H2QJqENSlTNyokitDAUMB8Vw/lHDquz76i70lKTgq5hblEBkcSFhR2XJ99ed+Toz/7tNw0sguyvb4nbsvN3sy9FX72FX1PYuvHYrfZverWC6xHg5AGntfbnbEb4Lg++/K+J1n5WRx2HSbEEULD0IbH9dlX9D0p/dnnFOSQmpta5ntSVeeIw2kOzzkqtbAQe1EoNst8n4sc6bgdEfy27BaS+9loVPLyPldtk4vmzZtHWFgYI0aM8Cq/9dZb2bt3L99//311vXSlZGbaCP3jFhxFEQQEFuK01/PcAgILcRRFEPrHLWRm2v78yWq4nCwHVkYz3EHpOK3IMjd3UDpWRjNysupGj83xfLYhf9zs9dnmFeaRW5DrCSZgTtKHXYfJzMv0ev7knGT2Ze4jvyjfU5adn82Owzs8J9RiOw7vYOOhjWTlZ5W0Ly+TNfvX8Hvy7151NxzawMpdK0nJSfGUpbvSWbpjKd/v9v5/8/Pen1m8ZTF7MvZ41f1006cs3rLYq+53Sd/x33X/5Y+UP7zq/mfNf3jvt/e86iZuT+Sln15izf41Xu19ZtUzPPv9s151v9jyBY8tf4zlScs9ZbkFuUxdOpUpS6d4/Vsu/GMhE76awKI/FnnKCt2FjFk8hrsX342r0OUp//z3z0mYn8B/1/3X6/Vu/+x2bv30Vq8hFJ/9/hnXfXwdL/30klfdG+bewLAPhrEvc5+nbP7m+VzxwRXMWjHLq+4/5/+T6z6+jl0Zuzxl/9v5P/766V95etXTZGRAflJ3wg/3I6/+JlyRv2IPzsERaOEOPUhoTju2Rv+bZ394yut5H/7fwyQsSGBT8iZP2doDa0lYkMBjyx/zqvvEd0+QsCCB3w785inbnLKZhAUJTPt2mlfdZ79/loQFCfy09ydP2Y7DO0hYkMCD3zzoVfeln14iYUGC12e0L2sfCQsSuH/J/V51X1/9OgkLEkjckegpS8lJIWFBAv9a/C+vuu/99h4JCxK8vmtZ+VkkLEggYUGCV9056+eQsCCBz37/zFOWV5TnqVv6/9Gnv39KwoIEPtrwkddzFNct/f9o8ZbFJCxIYPba2V5171p8FwkLEkjNTfWUfbP9GxIWJPDGL2941b1vyX0kLEjwhCaAZUnLSFiQwMs/vexV94FvHiBhQQI7D+/0lP2490cSFiSU+b8x7dtpJCxI4I/Ukv9za/avIWFBQpnv32PLHyNhQQLrDq7zlG08tJGEBQk8uuxRr7pPrXyKhAUJrN632lO2LW0bCQsSmLJ0ilfd5394noQFCazavcpTtjtjNwkLEhj/9Xivuq+ufpWEBQn8b+f/PGUHsg6QsCCBe7+816vuf9b8h4QFCSzZtsRTlu5KJ2FBAncuutOr7n/X/ZeEBQks2LzAU5ZTkOP5PIusIk/5xxs/JmFBAnM3zvWUFVlFnrq5Bbme8vmb55OwoOw5YvTC0SQsSCAjL8NT9uXWL0lYkMDbv77tVfeeL+8hYUECB7MPesq+3fEtCQsSeG31a151x389noQFCV7n25W7V5KwIIF///Bvr7qTl04mYUGC1xW31ftWk7AggadWVc85YsaKaZ5zlDv0IO7Q/QQG4jlHhR/uR35SdzIy8KtqSxjr1q2jY8eOOBzeLxEfH+853rdv3+p6+UoJSe9OVGY/0hp+QVBeOFkh6ym05eJwhxJ1aCjOtO78tO9HHlz9DB2iOzB5QMlJfco3D7M7YxcJPe+kc6MuAGxK3shra14mLjKO+8+5x1P336teISljJ9d0vJ5OjToDkJSRxOx1bxMdGs3o3v/01P3gt7nsOLyDQa0H0yG6IwCHcg4x7/ePCHeG89ceN3jqfrE5kd0Zuzgrtjftok4HTC/Aku1fEOIIZniXoQDYbDZCU3uTFbMAy5GFs9D8Bm3ZisgNSsKGjYa7bvH85vl90mq2pW2nU3Rn2jfsAJjfrOb9/hGWZXHbmTd52vDtthWsO7iO+Mbd6BV7FmAC28urX8DCYkzf0Z6ehCV/LGXFru84M+Yszj/tQtMGy2Laskm4cfPQeRMJc5rfzBdv/oqFfyzkrNjeXNXxas/r3fPVXeQX5vHYhY8SXc/8Rjp/02LeXzebs5v14e/dE8jLA2dadyyHiyznH0Rmn0WAFYqFRV7IdqwiB+mNFpGX1xPXkbzzj4UJJOcc4qmL/4/To9sBsGzHCv5v5ZPEN+7G5HOne9rwwFeT2J2xixkXzCC+qfnsV+/9jceWP0r7hh15dMBMT93/++5Ztqb9weT+kzmr+Znme3JoC1OWTqJVZGuevOAZT903V7/DhkPruP8v4zjvtHMA2Jm2hyeWP0lMWCxnRJ/tqTtvw+f8sv9n/tX7bk9vxMGsFF756TUahETRv/nFnrrfbFvGqt3fUc8RTruG5r2l52YxZ91HhAbW48rTS75TP+3+laU7v6ZhcGO6Ne1mPvuCfL7c8hUBdgd/P+MuT93fD21j2Y7vaBYWxzlxpqywyM0Pu00gysl147Cb33F3pe3nt/3raB3ZzvN4y4LNyVsAyM4pAtORQEp2JkmHd5OcneZV90BWMm53Edm5RTiPlGe7CsjIzSI7z+VVN7+giPyCQnJdFq4jnfiFhXbcRVBUkoexLHC7bbiLINfl9nwfCgvsYAVguU37bdhokXILqc3eB5uF3W7DsmcQaIXSKG0IB+p/QZAtlNKc9nqEBtSnsCDA87xFhQ5CA+rjtHvXDbKFlqlbmB9AaEB9ggPqedUNtIUQGlCfokJHmbohAWHedTF1raJAT938PHv5dY88L0VBnrp5eTbT3gCnV12HLZjQgPrY3E5PXVce5vFHCbCcptxd8ryuwvLr2q0gQgPqYy/1vJZVuq6tTN0AK9hTF0xdB/nYSvWv2KzAI+UhXnVD7GEUBlhede0V1DWfp8urLpb5PINsoV51iz9PmxVw3HXtpX4sW27zeQbZvesWf0Z2Sq5Mud32I9+pel51HZStax2pG3xU3QDLfJ52q2zdkADvNgRQTl3LfE8C7AFede1HPvsAW1Cp57V5Pk+XCxxH/jltblPXYXOWet6Szz4vz0aA27tuoD3Yq25IQBhuy/w/dhV/VdzmexJoO6quPQwCAsjLs3nOEVZRYLl1g+31CA0oID/fXvK9LDxS1x5CacH2sLL/7wvK/39fVecIp7ue5xy1udEyihy5BBRBQYA5R7VIuYUi/N+JVm3BMyUlhdatW5cpj4qK8hwvT15eHnl5eZ77GT6I5jZsND1wCxkNllEYkEFevhu3Ix93bhSZy28hc5+Nx2flsee0dHZHZjN5QMlj3513kEzHXjbPLiDiyC9ih0Oz2Ry7lbgwG/efU1L33+9vIcW+ke/eGEKD7CPvLySdTc2+p2lIc0b3Lqk7461f2Get4ZMD3YnONMEzy3mYDS0WEuVs5BU8J726gh2Fq2h1sDGNM0zwzA1MZ23LtwkLrM/wmUM9dZPzd5NXUER+wF5y0htjw4bbXkBhzA7s2U0JSe/uqfvQKyv4NfsLmqfcSGyaCZ4FAXn8ctoH2O1wa88bPSH1yXfW8/3hz4lNC6Z5igmeRTaLn9t8DcBdfRIIPHLie3XOLr5NXkWTw41omVzyKfzQdg0AY84qIOzIOef9T1JZvO8PvsloxQclv5Dyc+t9FNldjDurkOgjP48XLM7nq51p/JKZxZIDkJ0NO7bbKDjYiqLYPeTbMwkpCqUwIAN3gRN3eiyHD8Po0VDvyHP82tJGXiDsOcPN6UeumixdaufndTb+yIF1z5W0YX3zYFxBIexqD/FHroCu+i6IX3+IZIsrjD9eKKn7R9Nocp1Z7GoVxFlHrkCt/j6UDStasa0glhGlOlV2NIojx1nE7pj6cNqR1/qlPn8s68aOwoaMeL2k7t4GHcl2BrG3YSNoa8o2/RbG9qX92VUUxoi3SuoeDO9GjrM++8NbgPk42boxjN1fX4HdHcSId0vqptY7ixxnI5KDO0CXI+3aEsr+L2/BZgUw4v2Suhkh55AbFEd6QDvoZsp273SSuvBuAK79wO451eUEDcAVdDo5Rc3hzCPvYXcAGZ9PBmzc/IHT8+M8z3EBeY548i9qCEey9qFDkD13FmDjjg/CPXUL7RdREHA21vmh8Jcj7cqAjP8+D5aNuz6I9LTBbbsQt+08Avs74Nwjr5UHaf95DSwb930QUOrUfD5wPs6/AEdCdVhWdxwbriM/7gsy00NxR2whaPsgDn57M+7CW6AecGWpz/O1x8jLg6nvUEp3YLb5t72wVN23ppKTAY951e1s6rYDSn6PYOe7E8k5CE+9AyX9J22A2bhbAENK1f3vveTsgn+/AyX9Ms2B2YQ1Bi4vqZv00Why/hjNq8CrntLGwGwc4d7vLWneHeSsu4O3gZJ+pAhgNk4nUPK7IrsX3ELOT7cwB5jjKQ027w1wlq67+FpyvruWecA8T6mtpO5lpep+dQU5X1/BQmAhpb0FQNCgkpI9Sy8hZ+ElfA187VXX/AcM6l/qeZedT86881kOLPeqa04CQb2BI1dh9648h5z3z+EHwPsan+nVDOoOHLlSvu+nXuS8M5tfgRHPlK77MACBHYEjIx/2rYkn553ZbAJGPF+67iRT9zQ838v969qT885stgMjvDr+x5m6scCRH8n7NrYi553Z5AAjXi1d925TtyFw+pG6f8SU1PXqKE4AEgj8J9DpSN1tUeS8Yz6jEf8pXfc24DYct+I5n+zfVc9T9zqv7/sNwA0EXIfnfLJvj8NT9xavulcBV2EfBvQ0JYcOQdbb5mT2D6+6Q4Gh2C/Bcz7JyICUN80J9S6vuhcBF2E/H8/5JC8P9r5qTupjveqeB5yH/S94zicA21/6PwAmetXtbW5nAqVyRFWdI4qv3odldScqqx+7nV8QikW+4yCNMgYRltWdimd4+E61XlMtDiWVOTZjxgymTp1aXU2qUPEHdSj8C+yHOkD4VoJ2DyTgUHeKgPpZ3emS9BzdI71/42+XPIbcwlxC8uM8ZfXy2nH63il0aOP9G1CrjBuIdKVTL6+ktyc4vxmtDo6mVax3b0bz3ItxZPWgnqutpyyoMIrY1GtpEuX9G1DjgrPITW9MSH7JOMYAdyiNMi4kol6wV13H4fYE7biUwrglWEEZ2PIjIDAbe344QTsvxVbqR25DqwPRGYWE5LfylNndTpocHkqgw/vza0p3YlNDqJ/bpaSu5aBF8l8Bu9dg5ha2s2l1sJFXewHa7B8L2DxjlgBaO/px+t7TCCr0HpBy+l7zHYkMLhnX0y7wPLokdSXA7d2DU++rN8nr+yCFp/0Pq6gx+Y6D1N97MYVLHsadHwRnlNTtkvQCNmy0Di/5Lb5jyLn02nIuR+u825xYTo8s9R5CetJ9+ztl6rbbP9HUjSgpax7cni67nitTt9Uhc4ny9PCSssbOFrTfO71M3di0awBoW6rTqEFQY9ocuLdM3cYZJrW0LvVVCwuMIC75b2XqRmX3JSq7L3GlvsLBASHEpl1Vpm54blfCc7sSW+q/hsPuIDrz/DJ1Q/NPIzT/NBqVGkJss9mIzDmzTF1nYWOchY2JPGooeL28tmXqOtz1cbjrU++owUNBhWUnytmtIOxWEIG2o8uPb1yzDRvBv99CQewy3GG7sBWGEvz7LV7/d0RE/KW413NP42W4QncR4Da9nTXlHGWzLMuqjifu06cPRUVF/PDDD17l69evp0uXLrz88sv8/e9/L/O48no8W7RoQXp6OuHh4WXqn6ytW2HECIiMhMJGq/m15e1YFGEjgK7bX8NxqAeHD8Ps2dC6NdjtEFRytcDrcsLRTqZuXp7p2i+PzYbpTahk3a1b4aqrIDzCYmfHu0mO+IJQV1tygrcQnT6IlhufJjPDxocfQps2kJ8Pbnf5zwsQXCrTVmVdp9O0G6CgAIqKTqzutm1w/fUln+3a027HshVhswKI3/EaAQe9P9uKnrew0NwqEhRkPr+qrhsYCMXz7ypTt6jI/FtUxOEwt8rWdbvNZ1fVdS3LfIerom5AgPm3qOq6djvs2lVyrggOsdgQdzcHGsyjSdowOiU9TU62jcOH4f33oWPHksfWpnME+O//vT/OEceq+2f/53SOKFv3VD9H+CIbHKvu9u1www3mHBVaz2Jdc3OOanp4GJ13lZyjin/GV7WMjAwiIiL+NK9VW49n165def/99yksLPQa57l27VoAunTpUu7jnE4nTqez3GPVLTzH9HrujzQfVKSrOzl288E7nd4nxmLllVWkMnUr809Qmbo2GwTYbcSl3kJa/WW4nOa3objUWwiwe/82VPrL/meqq25gYMl//MrWdTrNZ2e3Q6TL+7ONyP3zz7ZY6ZPan6kJdQMCSn7AVGVdu/34v8OVqWuz1a66cOT/UMqtuAOyiUu5lQC7zfNdO/r7XdvOETXh/72vzhHHUhP+L+scYdSE//eVPUf4o27p/zc2Ss5RLZJvrTG9nVCNs9qHDRtGVlYWH3/8sVf5f/7zH2JjYzn77LMreKR/5OZCTraN6F230iD1IqJ33UpOto3c3D9/bG2TmwsBh7oTntaPfHsa4Wn9CDjUvU6+Vzi1Plupfrm5ZvywI7kbbTe8hiO5G9nZ6PskIjVCTT9HVVuP5+DBg7nwwgtJSEggIyODtm3b8v7777N48WLefffdGrGGJ0B4ODRsCCkpR7rRD3ej6Z7XKAIOH6nTsKGpV9t5v1cb9f64lZzCbOptv5X0DPPbUF15r3BqfbZS/cp8n8qh75OI+EttOUdV2xhPMFtmPvDAA15bZk6YMKFSW2Ye75iBk3HoEMdc1yo8HL8utlqVTqX3Cqfe+5Xqpe+TiNRk/jxHHW9eq9bgWRV8ETxFRERE5MQdb16rtjGeIiIiIiKlKXiKiIiIiE8oeIqIiIiITyh4ioiIiIhPKHiKiIiIiE8oeIqIiIiITyh4ioiIiIhPKHiKiIiIiE8oeIqIiIiITyh4ioiIiIhPKHiKiIiIiE8oeIqIiIiITyh4ioiIiIhPKHiKiIiIiE8oeIqIiIiITyh4ioiIiIhPKHiKiIiIiE8oeIqIiIiITyh4ioiIiIhPKHiKiIiIiE8oeIqIiIiITzj83YA/Y1kWABkZGX5uiYiIiIiUpzinFee2itT44JmZmQlAixYt/NwSERERETmWzMxMIiIiKjxus/4smvqZ2+1m79691K9fH5vNVu2vl5GRQYsWLdi1axfh4eHV/nr+dCq9Vzj13q9UL32fRKQm8/U5yrIsMjMziY2NxW6veCRnje/xtNvtNG/e3OevGx4efsr8MDmV3iuceu9Xqpe+TyJSk/nyHHWsns5imlwkIiIiIj6h4CkiIiIiPqHgeRSn08nkyZNxOp3+bkq1O5XeK5x671eql75PIlKT1dRzVI2fXCQiIiIidYN6PEVERETEJxQ8RURERMQnFDxFRERExCcUPEVERETEJ0654JmZmcn999/PRRddRKNGjbDZbEyZMqXcuqtXr+aCCy4gLCyMyMhIhg8fzrZt23zb4JOwZs0aLr30UuLi4ggJCSEqKoo+ffrw7rvvlqlb29/r0qVLsdls5d5WrVrlVbe2v1epetV1Xnjuuefo0KEDTqeT0047jalTp1JQUFCN70RE6ppvvvmG2267jQ4dOlCvXj2aNWvG5Zdfzs8//1ymbm04P51ywTMlJYVXXnmFvLw8rrjiigrrbdq0if79+5Ofn8+cOXN444032Lx5M/369ePQoUO+a/BJOHz4MC1atODRRx9l4cKFvP3227Rq1YqbbrqJhx9+2FOvLrzXYo8++igrV670unXp0sVzvC69V6k61XFeeOSRR/jXv/7F8OHD+eKLLxg5ciSPPvooo0aNquZ3IyJ1yYsvvsiOHTv417/+xcKFC3nmmWc4ePAgvXv35ptvvvHUqzXnJ+sU43a7LbfbbVmWZR06dMgCrMmTJ5epN2LECCs6OtpKT0/3lO3YscMKDAy07r//fl81t1qcffbZVosWLTz368J7TUxMtADrww8/PGa9uvBepepV9XkhOTnZCg4Otv7+9797Pf6RRx6xbDabtX79+up5IyJS5xw4cKBMWWZmptWkSRPr/PPP95TVlvPTKdfjWXz59VgKCwuZP38+V155pdf+pi1btmTAgAHMmzevuptZraKjo3E4HEDdf6+lnUrvVSqnqs8LixcvxuVyceutt3o9x6233oplWXzyySdV2n4RqbsaN25cpiwsLIxOnTqxa9cuoHadn0654Hk8tm7dSm5uLvHx8WWOxcfHs2XLFlwulx9admLcbjeFhYUcOnSIF154gS+++IJx48YBde+9jho1CofDQXh4OIMGDWL58uWeY3XtvYpvVeb7s27dOgC6du3qVS8mJobo6GjPcRGRE5Gens7q1avp3LkzULvOTwqe5UhJSQEgKiqqzLGoqCgsyyItLc3XzTphI0eOJDAwkMaNGzNmzBieffZZ/vGPfwB1571GRETwr3/9i5dffpnExESeeeYZdu3aRf/+/fniiy+AuvNexT8q8/1JSUnB6XRSr169cusWP5eIyIkYNWoU2dnZPPDAA0DtOj85qvXZa7ljXXr7s8tyNcnEiRO5/fbbOXjwIJ9//jmjR48mOzubsWPHeurU9vfavXt3unfv7rnfr18/hg0bRteuXbn//vsZNGiQ51htf6/iX8f7/dH3TESqw0MPPcR7773Hc889R8+ePb2O1Ybzk4JnORo2bAhQbupPTU3FZrMRGRnp41aduLi4OOLi4gC45JJLAJgwYQK33HJLnXuvpUVGRjJkyBBeeuklcnNz6/R7lepXme9Pw4YNcblc5OTkEBoaWqbu0T8sRESOx9SpU3n44Yd55JFHGD16tKe8Np2fdKm9HG3atCEkJIS1a9eWObZ27Vratm1LcHCwH1pWNc466ywKCwvZtm1bnX+vlmUB5je4uv5epXpV5vtTPHbq6Lr79+8nOTnZa4kvEZHjMXXqVKZMmcKUKVOYOHGi17HadH5S8CyHw+Fg6NChzJ07l8zMTE95UlISiYmJDB8+3I+tO3mJiYnY7XZat25dp99rWloa8+fPp1u3bgQHB9fp9yrVrzLfn4svvpjg4GDeeustr+d46623sNlsx1wrVETkaNOnT2fKlCk8+OCDTJ48uczxWnV+qtbFmmqohQsXWh9++KH1xhtvWIA1YsQI68MPP7Q+/PBDKzs727Isy9q4caMVFhZmnXvuudbChQutuXPnWl26dLFiY2OtgwcP+vkdHJ877rjDuvfee60PPvjAWrp0qfXRRx9Z11xzjQVY9913n6deXXiv1113nTVu3Djrww8/tBITE61XXnnFat++veVwOKwlS5Z46tWF9yrVo6rPCw8//LBls9msiRMnWkuXLrWeeOIJy+l0WnfccYc/3p6I1FKzZs2yAOviiy+2Vq5cWeZWrLacn07J4NmyZUsLKPe2fft2T72ffvrJOv/8863Q0FArPDzcuuKKK6wtW7b4r+GV9MYbb1j9+vWzoqOjLYfDYUVGRlrnnXee9c4775SpW9vf64wZM6xu3bpZERERVkBAgNWoUSNr2LBh1g8//FCmbm1/r1I9quO88Mwzz1inn366FRQUZMXFxVmTJ0+28vPzffSORKQuOO+88yo8Nx3df1gbzk82yzoyCE5EREREpBppjKeIiIiI+ISCp4iIiIj4hIKniIiIiPiEgqeIiIiI+ISCp4iIiIj4hIKniIiIiPiEgqeIiIiI+ISCp4iIiIj4hIKniIiIiPiEgqeIiIiI+ISCp4iIiIj4xP8DEzWQQ2SkM5YAAAAASUVORK5CYII=",
|
69 |
+
"text/plain": [
|
70 |
+
"<Figure size 800x500 with 1 Axes>"
|
71 |
+
]
|
72 |
+
},
|
73 |
+
"metadata": {},
|
74 |
+
"output_type": "display_data"
|
75 |
+
}
|
76 |
+
],
|
77 |
+
"source": [
|
78 |
+
"# top1 accuracy\n",
|
79 |
+
"CompoundT5 = [0, 0, 0, 0, 0]\n",
|
80 |
+
"ReactionT5 = [20.8, 30.4, 34.8, 46.1, 54.7]\n",
|
81 |
+
"T5Chem = [0.1, 0.0, 0.1, 0.3, 0.3]\n",
|
82 |
+
"\n",
|
83 |
+
"\n",
|
84 |
+
"# plot\n",
|
85 |
+
"import matplotlib.pyplot as plt\n",
|
86 |
+
"fig, ax = plt.subplots(1, figsize=(8, 5))\n",
|
87 |
+
"\n",
|
88 |
+
"\n",
|
89 |
+
"ax.plot([10,30,50,100,200], ReactionT5, \"o-\", label='ReactionT5', color='red', alpha=0.7)\n",
|
90 |
+
"ax.plot([10,30,50,100,200], CompoundT5, \"s--\", label='CompoundT5', color='blue', alpha=0.7)\n",
|
91 |
+
"ax.plot([10,30,50,100,200], T5Chem, \"v:\", label='T5Chem', color='green', alpha=0.7)\n",
|
92 |
+
"\n",
|
93 |
+
"\n",
|
94 |
+
"plt.ylim(-5, 60)\n",
|
95 |
+
"ax.set_xticks([10,30,50,100,200])\n",
|
96 |
+
"ax.set_xticklabels([10,30,50,100,200], fontsize=12)\n",
|
97 |
+
"# ax.set_yticks([10,20,30,40,50,60])\n",
|
98 |
+
"ax.set_yticklabels([int(i) for i in ax.get_yticks()], fontsize=12)\n",
|
99 |
+
"# plt.tight_layout()\n",
|
100 |
+
"ax.legend(loc=\"best\", fontsize=12)\n"
|
101 |
+
]
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"cell_type": "code",
|
105 |
+
"execution_count": 6,
|
106 |
+
"id": "d0f29837",
|
107 |
+
"metadata": {},
|
108 |
+
"outputs": [
|
109 |
+
{
|
110 |
+
"data": {
|
111 |
+
"text/plain": [
|
112 |
+
"<matplotlib.legend.Legend at 0x7f7834b445d0>"
|
113 |
+
]
|
114 |
+
},
|
115 |
+
"execution_count": 6,
|
116 |
+
"metadata": {},
|
117 |
+
"output_type": "execute_result"
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"data": {
|
121 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAqkAAAGzCAYAAAAMg46nAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAABrAklEQVR4nO3dd3gU1eLG8e8mm142HUIJXXokdNEgQaUJXgV7Q+yAXUSx0BHFcu0oYvnZ8IKKV7mgchUEpSiClyKgdKSlkoQ0Uub3x5hNlk2QQDa7Sd7P88yT7Jmzs2fCsnlz5sw5FsMwDEREREREPIiXuxsgIiIiInIihVQRERER8TgKqSIiIiLicRRSRURERMTjKKSKiIiIiMdRSBURERERj6OQKiIiIiIeRyFVRERERDyOQqqIiIiIeJwqh9Ts7GzGjx/PgAEDiI6OxmKxMHny5Arrrl+/ngsvvJDg4GDCwsIYPnw4u3btqrDuyy+/TLt27fDz86NFixZMmTKFwsLCqjZPREREROqAKofUtLQ05syZQ0FBAZdeemml9bZt20a/fv04fvw48+fP5+233+b3338nMTGRlJQUh7ozZszg3nvvZfjw4Xz99deMGTOGJ598krFjx1b5hERERESk9rMYhmFU5Qml1S0WC6mpqURHRzNp0iSn3tQrr7ySZcuWsXPnTkJDQwHYu3cvbdq04f777+fpp58GzNDbpEkTbrzxRt544w3785988kkef/xxNm/eTIcOHc7kHEVERESklqlyT6rFYsFisZy0TlFREYsWLWLEiBH2gArQrFkzkpKSWLhwob3sq6++Ij8/n1GjRjkcY9SoURiGweeff17VJoqIiIhILWd1xUF37txJXl4e8fHxTvvi4+NZunQp+fn5+Pv7s3nzZgA6d+7sUC82NpaoqCj7/ooUFBRQUFBgf1xSUkJ6ejqRkZF/G6RFREREpOYZhkF2djaNGjXCy6vy/lKXhNS0tDQAIiIinPZFRERgGAYZGRnExsaSlpaGn58fQUFBFdYtPVZFZs6cyZQpU6qv4SIiIiJSI/bv30+TJk0q3e+SkFrqZL2Z5fedar0TTZgwgQceeMD+ODMzk7i4OPbv3+8wzEBEREREPENWVhZNmzYlJCTkpPVcElIjIyMBKuwFTU9Px2KxEBYWZq+bn59Pbm4ugYGBTnW7detW6ev4+fnh5+fnVB4aGqqQKiIiIuLB/m5opksm82/VqhUBAQFs2rTJad+mTZto3bo1/v7+QNlY1BPrHj58mNTUVDp16uSKJoqIiIiIB3NJSLVarQwbNozPPvuM7Oxse/m+fftYtmwZw4cPt5cNGjQIf39/3n33XYdjvPvuu1gslpPOxSoiIiIiddNpXe5fsmQJOTk59gD622+/8cknnwAwZMgQAgMDmTJlCj169GDo0KE88sgj5OfnM3HiRKKionjwwQftx4qIiODxxx/niSeeICIiggEDBvDzzz8zefJkbr31Vs2RKiIiIlIPVXkyf4DmzZuzd+/eCvft3r2b5s2bA/DLL7/w8MMPs3r1aqxWK/379+fZZ5+lVatWTs976aWXePXVV9mzZw8NGzZk1KhRPPbYY/j4+Jxyu7KysrDZbGRmZmpMqoiIiIgHOtW8dloh1VMppIqIiIh4tlPNay6dgkpERESqV3FxMYWFhe5uhogTHx8fvL29q+14CqkiIiK1gGEYHD58mKNHj7q7KSKVCgsLo2HDhtWy8qdCqoiISC1QGlBjYmIIDAzU8t/iUQzDIDc3l+TkZMBc3v5MKaSKiIh4uOLiYntALV0wR8TTBAQEAJCcnExMTMwZX/p3yTypIiIiUn1Kx6CeuDKjiKcpfY9Wx7hphVQREZFaQpf4xdNV53tUIVVEREREPI5CqoiIiIh4HIVUERGR+qqkBDZtghUrzK8lJW5pxrvvvovFYrFvVquV2NhYrr76av744w+3tKm8J598ks8//9ypfPny5VgsFpYvX+6S1508ebLDz6WyrV+/fiet7+/v75L2uZru7hcREamPVq2CV16BrVuhoAD8/KB9e7jrLujTxy1Neuedd2jXrh35+fn8+OOPzJgxg2XLlrFt2zbCw8Pd0iYwQ+rll1/OpZde6lDetWtXVq9eTYcOHVzyurfeeiuDBg2yPz506BDDhw/n7rvv5tprr7WXn7hq01dffYXNZrM/9vKqnX2SCqkiIiL1zapVMG4cpKdDbCwEBEBeHmzYYJY/+6xbgmqnTp3o3r07AP369aO4uJhJkybx+eefM2rUqBpvz98JDQ2ld+/eLjt+kyZNaNKkif3xnj17AIiLizvp63br1o2oqCiXtaum1M5oLSIiImAYkJ9ftS03F158EdLSoGVLCAwEi8X82qKFWf7SS2a9qh7bMKr19EoD65EjR+xl69at45JLLiEiIgJ/f38SEhKYP3++w/NSUlIYM2YMHTp0IDg4mJiYGPr378/KlSudXqOgoICpU6fSvn17/P39iYyMJCkpiVWrVgHm3eo5OTn83//9n9Pl9cou93/xxRecc845BAYGEhISwkUXXcTq1asd6pRemt+yZQvXXHMNNpuNBg0acPPNN5OZmXmmP7o6QT2pIiIitVVBAVxxRdWek5UFv/4KVitUtMRqUREsWQKDB8MJl5H/1oIFUI3jH3fv3g3AWWedBcCyZcsYNGgQvXr14vXXX8dms/Hxxx9z1VVXkZuby0033QRAeno6AJMmTaJhw4YcO3aMhQsX0q9fP7799lt7yCwqKmLw4MGsXLmS++67j/79+1NUVMSaNWvYt28fffr0YfXq1fTv35+kpCSeeOIJwPnyenkfffQR1113HQMGDGDevHkUFBQwa9Ys+2ufd955DvVHjBjBVVddxS233MKmTZuYMGECAG+//fZp/9w6d+5McnIyUVFRDBw4kOnTpxMXF3fax3MXhVQREZH6pLDQvEGqstWAvL3h+HGzXg0rLi6mqKjIPiZ1+vTp9O3bl0suuQSAMWPG0LFjR7777jusVjPCDBw4kNTUVB599FFuvPFGvLy8aNu2La+99prDcQcOHMiePXt46aWX7CF13rx5LFu2jDfffJNbb73VXn/YsGH273v37o2XlxfR0dF/e2m/pKSEhx56iM6dO7NkyRL7WNAhQ4bQqlUrHn74YX788UeH59xyyy089NBDAFx44YXs2LGDt99+m7feeqvKc462atWKGTNmkJCQgL+/Pz/99BOzZs3im2++4ZdffqFx48ZVOp67KaSKiIjUVn5+Zu9lVWzeDLfeCjYbBAc77z92DDIzzUv+nTpVvT1n4MQQ2L59e/79739jtVrZsWMH27Zt49lnnwXMXtBSQ4YMYdGiRWzfvp327dsD8PrrrzNnzhx+++03CgoK7HXbtWtn/37JkiX4+/tz8803n1G7S23fvp2DBw9y3333OdysFBwczIgRI3jjjTfIzc11WDmsNICXio+PJz8/n+TkZBo0aFCl17/hhhscHiclJZGUlMQ555zDrFmzePHFF0/jrNxHY1JFRERqK4vFvLxela1rV+jQAZKTzed7eZVtFotZ3rGjWa+qxz7D1Ybee+89fv75Z7777jvuuOMOtm7dyjXXXAOUjUsdN24cPj4+DtuYMWMASE1NBeD5559n9OjR9OrVi08//ZQ1a9bw888/M2jQIPLy8uyvl5KSQqNGjart7ve0tDQAYmNjnfY1atSIkpISMjIyHMojIyMdHvv9FfTLt/NM9OzZk7POOos1a9ZUy/FqknpSRURE6hMvL3OaqXHjYOdOaNiw7O7+w4chPBzGjjXr1bD27dvbb5ZKSkqiuLiYuXPn8sknn9C5c2cAJkyYwPDhwyt8ftu2bQH44IMP6NevH7Nnz3bYn52d7fA4OjqaH374gZKSkmoJqqWB89ChQ077Dh48iJeXl1um0jIMo1ZOQ1X7WiwiIiJnpk8fc5qphATz5qk9e8yvXbu6bfqpisyaNYvw8HAmTpxImzZtaNOmDf/73//o3r17hVtISAhg3pHvd8LQg40bNzrdYT948GDy8/N59913T9oOPz+/U+rZbNu2LY0bN+ajjz7CKDfTQU5ODp9++qn9jv+atGbNGv744w+XTpXlKupJFRERqY/69IHevWHLFsjIMHtQO3Z0Sw9qZcLDw5kwYQLjx4/no48+4o033mDw4MEMHDiQm266icaNG5Oens7WrVtZv349C/4anzt06FCmTZvGpEmTOP/889m+fTtTp06lRYsWDmNZr7nmGt555x3uvPNOtm/fTlJSEiUlJaxdu5b27dtz9dVXA+bd8suXL+fLL78kNjaWkJAQe69teV5eXsyaNYvrrruOoUOHcscdd1BQUMAzzzzD0aNHeeqpp1z68zr77LO5/vrr7dNp/fTTTzzzzDM0bNiQ8ePHu/S1XUEhVUREpL7y8oK/LqN7qrvvvptXXnmFqVOnsnXrVn766SdmzJjBfffdR0ZGBpGRkXTo0IErr7zS/pzHHnuM3Nxc3nrrLWbNmkWHDh14/fXXWbhwocOcplarlcWLFzNz5kzmzZvHCy+8QEhICGeffbbDSk8vvvgiY8eO5eqrryY3N5fzzz+/0qVQr732WoKCgpg5cyZXXXUV3t7e9O7dm2XLltHHxT3UHTp0YM6cORw6dIjjx4/TqFEjrr76aiZOnFjhOFlPZzGMap55142ysrKw2WxkZmaedA4zERGR2iQ/P5/du3fTokWLWrsOu9QPp/JePdW85jl9+iIiIiIif1FIFRERERGPo5AqIiIiIh5HIVVEREREPI5CqoiIiIh4HIVUEREREfE4CqkiIiIi4nEUUkVERETE4yikioiIiIjHUUgVEREREY+jkCoiIiIeYePGjYwaNcq+pGZwcDBdu3Zl1qxZpKenu7t5tUq/fv3o16+f/XuLxfK32+TJk09af9CgQTV6DtYafTURERFxu5QUyMqqfH9oKERH11x7AN58803GjBlD27Zteeihh+jQoQOFhYWsW7eO119/ndWrV7Nw4cKabVQd8dprr5FV7h/8P//5D9OnT+edd96hXbt29vImTZrYv2/ZsiUffvihw3HCwsJc3tbyFFJFRETqkZQUuPZaSEurvE5kJHz0Uc0F1dWrVzN69GguuugiPv/8c/z8/Oz7LrroIh588EG++uqrmmlMHdShQweHx9u2bQOgU6dOdO/evcLnBAQE0Lt3b5e37WR0uV9ERKQeycoyA6qfH4SFOW9+fub+k/W0Vrcnn3wSi8XCnDlzHAJqKV9fXy655BIASkpKmDVrFu3atcPPz4+YmBhuvPFG/vzzT4fn9OvXj06dOrF69Wr69OlDQEAAzZs355133gHM3sSuXbsSGBhI586dnULw5MmTsVgsbNiwgeHDhxMaGorNZuP6668nJSXFoe6ptql58+bcdNNNTudX/tI8wPLly7FYLMybN4/HHnuMRo0aERoayoUXXsj27dsdnmsYBrNmzaJZs2b4+/vTtWtXlixZcvIfeC2hkCoiIlLL5edXvh0/7li3oABKSswwGhBQ8XY6xz1dxcXFfPfdd3Tr1o2mTZv+bf3Ro0fz8MMPc9FFF/HFF18wbdo0vvrqK/r06UNqaqpD3cOHDzNq1ChuvfVW/v3vf9O5c2duvvlmpk6dyoQJExg/fjyffvopwcHBXHrppRw8eNDp9S677DJat27NJ598wuTJk/n8888ZOHAghYWFp9Wmqnj00UfZu3cvc+fOZc6cOfzxxx8MGzaM4uJie50pU6bYX/vzzz9n9OjR3HbbbU5htqp27txJREQEVquVVq1a8dhjj5GXl3dGx6wqXe4XERGp5a64ovJ93bvDpElljx94AHbvBqvV3MoLCYG4uLLHt9xSeY9qmzbw/POn3+ZSqamp5Obm0qJFi7+tu23bNubMmcOYMWN4+eWX7eUJCQn06tWLf/7zn8yYMcNenpaWxtdff023bt0A6N69OzExMTz11FPs2LGDRo0aAdCoUSO6dOnCp59+yt133+3wmsOHD2fWrFkADBgwgAYNGnDdddcxf/58rrvuuiq3qSo6dOjABx98YH/s7e3NlVdeyc8//0zv3r05evQoTz/9NJdddhlz58611+vYsSPnnnsubdu2Pa3XPe+887jqqqto164deXl5LFmyhFmzZvHDDz+wbNkyvLxqpo9TPakiIiJSKyxbtgzA6ZJ5z549ad++Pd9++61DeWxsrD2gAkRERBATE0OXLl3sARWgffv2AOzdu9fpNa+77jqHx1deeSVWq9Xelqq2qSpKhziUio+Pd2jn6tWryc/Pd2pjnz59aNas2Wm/7vTp0xk9ejRJSUkMGTKEl19+maeeeooVK1bw73//+7SPW1XqSRUREanlFiyofN+JnV7PP2/eOBUWBkFBzvXLX9F9661TP+7pioqKIjAwkN27d/9t3bS/7vaKjY112teoUSOnkBkREeFUz9fX16nc19cXgPz8fKf6DRs2dHhstVqJjIy0t6WqbaqKyMhIh8el43VLL7uXvvaJbays7Excf/31jBs3jjVr1nDZZZdV67Ero55UERGRWs7fv/Ltr/xl5+dnBszKttM97uny9vbmggsu4JdffnG60ehEpaHt0KFDTvsOHjxIVFRU9TSqnMOHDzs8LioqIi0tzd6WqrTJ39+fgoICp3qnO2619LVPbGNlZdWhpi71g0KqiIhIvZSXBzk5zlsN3xsDwIQJEzAMg9tuu43jFdyRVVhYyJdffkn//v0BHMZpAvz8889s3bqVCy64oNrbduJcofPnz6eoqMh+N35V2tS8eXM2btzoUO/3338/7Zucevfujb+/v1MbV61adUY9uBX5v//7P/tr1hRd7hcREalHQkPNeVDT0sw7/SsSGWnWqynnnHMOs2fPZsyYMXTr1o3Ro0fTsWNHCgsL2bBhA3PmzKFTp04sXLiQ22+/nZdffhkvLy8GDx7Mnj17eOKJJ2jatCn3339/tbfts88+w2q1ctFFF7FlyxaeeOIJzj77bK688koA2rZte8ptuuGGG7j++usZM2YMI0aMYO/evcyaNYvo05yQNjw8nHHjxjF9+nRuvfVWrrjiCvbv38/kyZNP+3L/ypUrmTFjBpdddhktW7YkPz+fJUuWMGfOHPr378+wYcNO67inQyFVRESkHomONifq97QVp2677TZ69uzJP//5T55++mkOHz6Mj48PZ511Ftdeey133XUXALNnz6ZVq1a89dZbvPrqq9hsNgYNGsTMmTOdxnBWh88++4zJkycze/ZsLBYLw4YN44UXXrCPY61Km6699loOHjzI66+/zjvvvEOnTp2YPXs2U6ZMOe32TZ06laCgIF577TXef/992rVrx+uvv86zzz57WseLjY3F29ubadOmkZqaisVioU2bNkydOpUHH3ywRi/3WwzDMGrs1VwsKysLm81GZmYmoTX5J6CIiIgL5efns3v3bvua9uJ6kydPZsqUKaSkpLhkrGtddSrv1VPNaxqTKiIiIiIeRyFVRERERDyOQqqIiIjICSZPnoxhGLrU70YKqSIiIiLicRRSRURERMTjKKSKiIiIiMdRSBURERERj6OQKiIiIiIeRyFVRERERDyOQqqIiIiIeByFVBERERHxOAqpIiIi9Uxqbio703dWuqXmptZIOywWyylty5cvP2n9p556qsLjr1y5kiuvvJLGjRvj6+uLzWajT58+zJ49m5ycHId23HXXXTVxylIFVnc3QERERGpOYXEhdy66k10Zuyqt0zK8JfNGzMPH28elbVm9erXD42nTprFs2TK+++47h/IOHTrYv7/88st58MEHHfbHxcU5HXvSpElMnTqVPn36MG3aNFq1akVubi6rVq1i8uTJ/P777/zzn/+sxrOR6qaQKiIiUo9Yvaw0DmnMhsMbaGZr5rR/b+ZeGoc0xurl+ojQu3dvh8fR0dF4eXk5lZfXoEGDk+4HWLBgAVOnTuWWW27hzTffxGKx2PcNHjyY8ePHOwVk8Ty63C8iIlLL5Rflk1+Uj2EY9rKikiLyi/IpLC50qFtQXMDVna7G5mejqKSIIN8gAn0CCbAGcLz4ODY/GyO7jMRisZz0uMeLj9fY+VXV1KlTCQ8P56WXXnIIqKVCQkIYMGCAU/n7779P+/btCQwM5Oyzz2bRokVOdf744w+uvfZaYmJi8PPzo3379rz66qsOdZYvX47FYuGjjz7i4YcfJjY2luDgYIYNG8aRI0fIzs7m9ttvJyoqiqioKEaNGsWxY8eq7wdQRyikioiI1HJXLLiCKxZcQVZBlr3ss62fccWCK3h93esOda//7HpmrJxBt9huJOckYxgGR3KO8PPBn/k97XcS4xJJaJgAwC1f3MIVC65gf9Z++/O/3fUtVyy4glk/zqqZkzvBRx99REBAAH5+fnTr1o133nnHYf+hQ4fYvHkzAwYMIDAw8JSP+5///IdXXnmFqVOn8umnnxIREcFll13Grl1lwyJ+++03evTowebNm3nuuedYtGgRF198Mffccw9TpkxxOuajjz5KcnIy7777Ls899xzLly/nmmuuYcSIEdhsNubNm8f48eN5//33efTRR0//h1JH6XK/iIhIPWOxWLiq01X8cugXe7AtKikiwCfA3ovqia699louvvhimjZtSnJyMm+99RY333wzu3btYtq0aQDs27cPgBYtWlTp2Hl5efz3v/8lJCQEgK5du9KoUSPmz5/PI488AsADDzxASEgIP/zwA6GhoQBcdNFFFBQU8NRTT3HPPfcQHh5uP2Z8fLxDiN62bRsvvPAC99xzD88884z9+atXr+bDDz/kpZdeOs2fTN3k0pC6YcMGpkyZwk8//cTRo0eJi4vj2muvZdy4cQ5/3axfv57x48ezZs0arFYr/fv359lnn6Vly5aubJ6IiEidsOCKBQD4efvZy4a3H84lbS/B2+LtUPeD4R8A4OvlS2JcIl/v/JpW4a3IzM9kQKsB9l5UgLcuecvpuBe0vIDzm5+Pl6XmL8Z++OGHDo9HjBjBsGHD7AExOjr6tI+dlJRkD6hgjn2NiYlh7969AOTn5/Ptt98yevRoAgMDKSoqstcdMmQIr7zyCmvWrGHw4MH28qFDhzq8Rvv27QG4+OKLnco///xzjh07RnBw8GmfQ13jsnfYb7/9Rp8+fdizZw8vvPACixYt4uqrr2bq1Klcc8019nrbtm2jX79+HD9+nPnz5/P222/z+++/k5iYSEpKiquaJyIiUmf4W/3xt/o79IBavaz4W/2d7tAvrevl5cXILiMJ9Ankz+w/CfYN5uaEmx2OcbLj+nr7uv7ETsH1119PUVER69atA8ru9N+9e3eVjhMZGelU5ufnR15eHgBpaWkUFRXx8ssv4+Pj47ANGTIEgNRUx6m7IiIiHB77+vqetDw/P79Kba7rXNaT+tFHH5Gfn8+nn35Kq1atAOjfvz+HDh1izpw5ZGRkEB4ezsSJE/Hz82PRokX2rvNu3brRpk0bnn32WZ5++mlXNVFERKReS2iYQGJcIgu3LeSydpc59KLWFqU3dXl5mf1usbGxdO7cmW+++Ybc3NwqjUs9mfDwcLy9vbnhhhsYO3ZshXWqOsRATs5lIdXHx/zLzWazOZSHhYXh5eWFr68vRUVFLFq0iBtvvNEeUAGaNWtGUlISCxcu9LiQmpICWVmV7w8NhTO42iAiIlJjLBYLoxJGkVOYw6iEUR47FvVk3n//fXx8fOjWrZu97IknnuDKK6/knnvucZqCCuDYsWOsWrWqwjv8KxMYGEhSUhIbNmwgPj7e3vspruOykDpy5EheeOEFRo8ezdNPP010dDTff/89b7zxBmPHjiUoKIjt27eTl5dHfHy80/Pj4+NZunQp+fn5+Pv7V/gaBQUFFBQU2B9nnSw9VoOUFLj2WkhLq7xOZCR89JGCqoiI1A5dGnZh7iVz3d2Mv/XMM8/w22+/ccEFF9CkSRP7jVPffPMNkydPJioqyl73iiuu4IknnmDatGls27aNW265xT6Z/9q1a3njjTe46qqrqhRSAV588UXOO+88EhMTGT16NM2bNyc7O5sdO3bw5ZdfOi1CIGfGZSG1efPmrF69mssuu8x+uR/gnnvu4YUXXgDM8R3gPDajtMwwDDIyMoiNja3wNWbOnFnhlA+ukpVlBlQ/PwgIcN6fl2fuz8pSSBUREalO7dq144svvuA///kPGRkZBAQE0KVLF+bNm8fVV1/tVH/q1KlceOGFvPzyyzz22GOkpqYSEBBAx44deeCBB7jjjjuq3IYOHTqwfv16pk2bxuOPP05ycjJhYWG0adPGPi5Vqo/FKD9DbzXas2cPF110EQ0aNOD+++8nOjqatWvXMn36dC6//HLeeustVq1axbnnnsvHH3/MVVdd5fD8mTNn8uijj3Lo0CEaNmxY4WtU1JPatGlTMjMzHYYPVJedO+GKKyAsDIKCnPfn5MDRo7BgAZTL5SIiImckPz+f3bt306JFi0qvLop4glN5r2ZlZWGz2f42r7msJ/WRRx4hKyuLX3/9laC/El3fvn2Jiori5ptv5sYbb7SHz7QKrp+np6djsVgICwur9DX8/Pzw8/OrdL+rlZTArl3QoAGUm7VCRERERM6Qy0Lqr7/+SocOHewBtVSPHj0A2Lx5M+eeey4BAQFs2rTJ6fmbNm2idevWHv0XY3IypKebW2goVDBqQUREREROg8vmSW3UqBFbtmxxWot29erVADRp0gSr1cqwYcP47LPPyM7OttfZt28fy5YtY/jw4a5qXrWIiICYGLBYzHGoO3bAwYPmVxERERE5fS4Lqffddx+pqalcdNFFzJ8/n++++44nn3ySBx54gA4dOthXZJgyZQq5ubkMHTqUJUuWsHDhQi6++GKioqJ48MEHXdW8auHrC82bw9lnl4XVvDx4+mmYNAnKLUYhIiIiIlXgspB6ySWX8O233xIaGsq9997L0KFD+b//+z/uuOMOVqxYYZ9frF27dixfvhwfHx8uv/xybrrpJlq3bs2KFSvOaHkzV8rLM2+SKt0KC827+Zs3Ny/7e3uDvz9YXbrorIiI1DcuutdZpNpU53vUpTEqKSmJpKSkv63XrVs3/vvf/7qyKdUiNNScBzUtDcpNKuCgXTt44QWzXqnkZJgzB666Ctq0qZGmiohIHWL9q9ejSJfoxMOVvket1dBTp76+KoiONifqr+qKU/Pnw9q15tazp7kggKaoEhGRU+Xt7Y23tzdZWVmEaDoZ8WBZWVn29+uZUkitoujoqk/UP3w4HD8Oy5fDTz+ZW69eZlht2dIlzRQRkTrEYrEQExPDoUOH8PPzIygoqFYuYSp1l2EY5OTkkJWVRWxsbLW8P102mb87nOrksO5y4AB8/DF8/z2U/tQvuADuu8+tzRIRkVrAMAwOHz5MZmamxqaKR7JYLNhsNho2bHjSkOr2yfzFWePG8OCDcOWVZlhdudJx7KqIiEhlLBYLsbGxxMTEUFhY6O7miDjx8fGplsv8pdST6kb79kF4eNlqVZs2weLF5jCApk3d2zYRERERV1BPai0QF+f4+KOPYPNm+PFHSEyEa66BJk3c0zYRERERd3LZPKlSdbffDuecY45XXbECxoyB5583x7KKiIiI1Ce63O+Bdu0ye1XXrjUfWyzmONbrr3dvu0RERETO1KnmNfWkeqCWLeHxx+Gf/4QePcye1caN3d0qERERkZqjMakerHVrmDgRdu40l1wt9fXXsH27uYJVgwZua56IiIiIyyik1gLlV6cqKjKnr0pNhe++M+dZveoqiIlxX/tEREREqpsu99cyVis8/DAkJEBxMXzzDdxxB7z6KqSkuLt1IiIiItVDN07VYlu3wocfwv/+Zz62Ws0ZAgYPdm+7RERERCqjG6fqgfbtYfp0eOopiI83hwKUHxogIiIiUltpTGod0LEjzJgBu3dDixZl5f/3f1BQAJdfDhER7mufiIiISFUppNYh5QNqZib8+99QWGjOBjB4sBlWw8Lc1jwRERGRU6bL/XVUaKg5fVX79nD8uBlYb7kF3n7bDLAiIiIinkw3TtVxhgEbNpgrWG3fbpb5+cH48dCzp3vbJiIiIvXPqeY1Xe6v4ywW6NrVnLJq/XpzNoC9e6FNm7I6hmHWExEREfEUCqn1hMUC3bqZgfXAAQgPL9v39NPmsquXXgohIW5rooiIiIidQmo9Y7FAkyZlj//4A3780fz+yy/hkkvMsBocbC4OkJVV+bFCQyE62qXNFRERkXpKY1LrOcOAtWvNMau7d5tlgYGQlATz58PRo5U/NzLSfJ6CqoiIiJwqjUmVU2KxQO/e0KsXrF4N8+bBnj1mQN2wAZo3dxwaUCovD9LSzJ5WhVQRERGpbgqpAphhtU8fOOccWLUKZs82l1sNC4OgoIqfU1BQo00UERGRekQhVRxYLHDuudCggRlSS+/6Nwz4/Xdo3Rq8vd3bRhEREan7NJm/VMhiAR+fsseHD5uLAKSnu69NIiIiUn8opMopKb29TiFVREREaoJCqpySiAjza1YWFBa6ty0iIiJS92lMqpxUXl7Z976+kJsLhw6Z01SJiIiIuIp6UqVCoaHmPKgFBeZcqUePmmNUi4rgyBGzPDLSrCciIiJS3dSTKhWKjjYn6i+/4lRaGjzyiHlT1axZEBenOVJFRETENRRSpVLR0Y4htFUr6N4dtm6FnBwFVBEREXEdhVSpkjvvNC/xR0W5uyUiIiJSlymkSpW0bOnuFoiIiEh9oBun5LQVF7u7BSIiIlJXqSdVquzAAZgzB44dg+eec3drREREpC5SSJUqCwmBX3+FkhIzsDZu7O4WiYiISF2jy/1SZaGhkJBgfr9ypXvbIiIiInWTQqqclsRE86tCqoiIiLiCQqqclt69wWqFfftg7153t0ZERETqGoVUOS1BQdCtm/m9elNFRESkuimkymnr29f8umIFGIZ72yIiIiJ1i+7ul0ql5qaSmZ9Z6f6WnWx06RJFnz7mnf7e3jXYOBEREanTFFKlQoXFhdy56E52ZeyqtE7L8JbMmzwPH2+fGmyZiIiI1Ae63C8VsnpZaRzSmMyCTML8w5y2zIJMGoc0xuqlv3NERESk+imkSoUsFgsju4zE5mejqKSIIN8g+1ZUUoTNz8bILiOxWCxkZsLixbBnj7tbLSIiInWFQqpUKqFhAolxiRw+dpgDmQfYn7mfkpISknOSSYxLJKGhOaP/W2/B7NnwzTdubrCIiIjUGQqpUqnS3lR/qz97M/dy+NhhDh07RKBPoL0XFcom9v/hB/MGKhEREZEzpZAqTkqMEv5I+wMwe1OTmidhsViIs8WRW5jr0IsK5hKpwcGQkQFbtrir1SIiIlKXKKSKg5zjOdyz5B7G/3c8R44dsfemxtniKCgucOpFBXPlqXPOMb9fscJNDRcREZE6RSFVHAT5BhHuH46ftx/7MvcBZWNTM/IySIxLpHV4az7b+hlGuRn8Syf2X7UKiorc0XIRERGpSzR/UD33Z9affL7tc27vdju+3r4A3N3rbgJ9Agn2DQbMsamjEkaRU5jDDWffwLil4zh07BB+3n5cfNbFAHTuDDYbZGbCxo3QtavbTklERETqAPWk1mMlRgmTlk3i651fs/iPxfbymKAYe0At1aVhF+ZeMpfujboz9KyhNApuRNuotvb93t5w7rnm1/37a+wUREREpI6yGEbdWXU9KysLm81GZmYmoaGh7m6OR8ovysff6m9/vHTnUtYeWMvIs0fS1Nb0lI5hGAYFxQUOxwFITwcfHwgJqdYmi4iISB1yqnlNIbUe+WL7F8zbPI8J500gvkE8YAbO8jdBnY6c4zkE+QZVRxNFRESkjjvVvKbL/fXIoexDHDt+jKU7l9rLzjSgrvlzDbd9eRur9q9yKD927IwOKyIiIvWcbpyqowzD4KcDP9Emsg0RAREAXN3palpFtKJ/i/7V9jrbU7eTfTybJX8s4Zwm55CdbWHaNHOJ1A8+AD+/anspERERqUcUUuuoN9e/yZe/f8mgVoMY23MsADZ/Gxe2vLBaX+e6+OsIDwhnSJshWCwWQkLMsan5+bBunXkzlYiIiEhV6XJ/HXVe3Hn4evti87fhymHHVi8rl7S9BKuX+feOxVI2Z6om9hcREZHTpZ7UOiA1N5WPNn1E28i2DGw9EIAO0R149x/vEuJXc7faG4bB0l1LadC5MXzSkXXrIC8PAgJqrAkiIiJSR6gntQ5Y8+calu5ayoebPqSwuNBeXpMBFWDJjiW8/NPL/Gvfs8Q0yeH4cVi7tkabICIiInWEy0PqDz/8wJAhQwgPDycgIIA2bdowbdo0hzrr16/nwgsvJDg4mLCwMIYPH86uXbtc3bRa63jxcVJyUuyPB7UeRL9m/Xgs8TF8vH3c1q6k5kk0DW3KsLbDSDo3ENAlfxERETk9Lg2pH330Eeeffz42m4333nuPxYsX8/DDDzuMkdy2bRv9+vXj+PHjzJ8/n7fffpvff/+dxMREUlJSTnL0+mlz8mZu//J2nl/9vP3naPWy8mCfBx1WgHKHAJ8AXhr8EsPbD+f8882prTZs0HRUIiIiUnUuG5N64MABbr/9du644w5ee+01e3lSUpJDvYkTJ+Ln58eiRYvsE7p269aNNm3a8Oyzz/L000+7qom1UsPghmQVZOFl8eJo/lHCA8Ld3SQHpTdQNW0KQy4uoWnrLHx9w9zbKBEREal1XNaTOnfuXHJycnj44YcrrVNUVMSiRYsYMWKEw4oDzZo1IykpiYULF7qqebXGluQtLP5jsf1xVGAUU5Om8vrQ1z0uoJaXnpfOn22e4OvjT4D3cXc3R0RERGoZl4XUFStWEBERwbZt2+jSpQtWq5WYmBjuvPNOsrKyANi5cyd5eXnEx8c7PT8+Pp4dO3aQn59f6WsUFBSQlZXlsNUlO9N38si3j/Dm+jdJzkm2l3eK6YSvt68bW/b3LFjYm7mXwzmH2ZWh8cUiIiJSNS4LqQcOHCA3N5crrriCq666iv/+97889NBDvPfeewwZMgTDMEhLSwMgIiLC6fkREREYhkFGRkalrzFz5kxsNpt9a9q0qatOp8YUlRTZv28V0Ypusd24sMWFHh9KTxQeEM6E8ybweLcX2Ly8HatW/f1zREREREq5bExqSUkJ+fn5TJo0iUceeQSAfv364evry3333ce3335LYKB5B/jJ1o8/2b4JEybwwAMP2B9nZWXV2qCaV5jHvM3z+OnAT7w0+CV7KJ14/kS8LLVzprCOMR1Z+CP83/9Bp07Qp4+7WyQiIiK1hcvST2RkJAADBw50KB88eDBgTjtVWqe0R7W89PR0LBYLYWFhlb6Gn58foaGhDltt5e3lzYq9KziQfYA1f66xl9fWgFrqvPPMr+t+P8Bz379OiVHi3gaJiIhIreCyntT4+HjWrFnjVF46bZKXlxetWrUiICCATZs2OdXbtGkTrVu3xt/f31VNdKvikmLWH1pPj8Y9APD19mV099FYvax0je3q5tZVn+hoOKv9cf6VN4HM9Rm0iIlhePvh7m6WiIiIeDiXddONGDECgCVLljiUL15s3qneu3dvrFYrw4YN47PPPiM7O9teZ9++fSxbtozhw+tmmCkqKeLer+5l6oqpbDpSFtB7NelFt0bdTjrEoTbql+hL07Sb8EqJp1/zfu5ujoiIiNQCLutJHTBgAMOGDWPq1KmUlJTQu3dv1q1bx5QpUxg6dCjn/XUdeMqUKfTo0YOhQ4fyyCOPkJ+fz8SJE4mKiuLBBx90VfPcyuplpWN0RzLyM8gqqFszElTk3HMh6s0kjE1JFGVbIMDdLRIRERFP59IBj//617+47777mDNnDoMHD2b27Nncf//9fPLJJ/Y67dq1Y/ny5fj4+HD55Zdz00030bp1a1asWEF0dLQrm1dj9hzdw5MrnyQ9L91edsPZN/DmsDc5N+5cN7asZkREQHxnCxYsrFxplh3MPuiw8piIiIhIeRajDiWFrKwsbDYbmZmZHnUT1cNLH+a31N8Y3HowY3qMcXdz3OKrr2DuXLj0Ugju/jnv/vouY3uM5aJWF7m7aSIiIlKDTjWvuexyf12VmptKZn5mpftt/jZ8vX0J9Am0LxF649k38p8//sNl7S6rqWZ6nKQk6NcP/P3hk9+KKDaK2ZKyRSFVREREKqSe1CooLC7kmk+vOekKSgE+ATQKacQtCbcwqPWgam9DXWAYBj8d+ImejXvWuZvERERE5ORONa/V7kk4a5jVy0rjkMZkFmQS5h/mtGUWZBLhH0FeYR5r/1zr7uZ6rIMHLfRq0ksBVURERCqly/1VYLFYGNllJCv3raSopAibn430/HT8vP3sj5/o+wSFJYX0aarllU5UUgIPPgg7dsArr0CzZuZ0XO//7306xXSyzxkrIiIiop7UKkpomEBiXCLJOcn8mfUnO9J3sO/oPpJzkkmMS6RH4x6cG3euegkr4OUFfy0yZr/L/8vtX/LZts94Ye0L5Bbmuq9xIiIi4lEUUquotDc10CcQf6s/Vi8rVm8rAdYARnYZqXD6N/r2Nb+uWAGGAUPPGkp8TDx397ybQJ9A9zZOREREPIZC6mko7U3NyM/g7AZnA9C3WV8SGia4uWWer2dP8PWFQ4dg507w8fZhev/p9G7S291NExEREQ+ikHoayvemHsg+QKBPoHpRT5G/vxlUoeySf/mfW35RPgeyDrihZSIiIuJJFFJPk703NS+DxLhE9aJWQWKi+XXlSvOSf6kDWQe4d8m9TF4+WeNTRURE6jmF1NNksVgYlTCKAa0GMCphlHpRq6B7d7NHNSUFtm0rKw/zD6OopIgio4jknGT3NVBERETcTpP5i1ssWQJRUZCQANZyE6HtObqHyIBIQvxC3Nc4ERERcRktiyoebfDgisubhzWv0XaIiIiIZ9LlfvFY/zv8P6Z+P5WikiJ3N0VERERqmEKquM2RI/Dee/Dxx8778ovymbVqFj8f/JnPt31e420TERER99LlfnGbQ4dgwQIIDYXLL3ccm+pv9eeenvfw88GfGXbWMPc1UkRERNxCIVXcpnNnsNkgMxM2boSuXR3392rSi15NermncSIiIuJWutwvbuPtDeeea36/YsXf1994ZCN1aDIKEREROQmFVHGr0on916yBwsLK6835ZQ6PffeYxqeKiIjUEwqp4lYdO0JEBOTkwPr1ldeLs8VhwUJ+UX7NNU5ERETcRmNSxa0sFrM39d//Ni/596pkCOrAVgNpF9VO86iKiIjUE+pJFbdLTISQELNHtTIWi8UhoGpsqoiISN2mnlRxu7POMudLtZ7iuzG7IJsX177IeXHn0a95P5e2TURERNxDIVXczmI59YAKsHTXUtYeWMvW1K30btIbf6u/6xonIiIibqGQKh7DMGD7dmjeHPxPkjv/0fYf7M/cz7C2wxRQRURE6iiNSRWPMXEiPPSQOR3VyXh7eXNv73tpGd6yZhomIiIiNU4hVTxGu3bm11OZ2L+8lJwUtqdur/4GiYiIiNsopIrH6NvX/LphAxw7dmrP+SPtD+756h5mrJxBZn6m6xonIiIiNUohVTxG06bmeNSiIli9+tSeE2eLI9w/nOjAaAqKC1zaPhEREak5unFKPEpiIuzZY17yv+iiv6/vZ/VjatJUwvzDsHrp7SwiIlJXqCdVPEpiovl140bIPMWr91GBUQ4BtcQocUHLREREpCYppIpHiY2FNm2gpATWrq3acw3D4KsdX3H/V/eTX5TvmgaKiIhIjdD1UfE4I0eCtzd07Fi15+UW5jJv8zzS89L5esfX/KPdP1zTQBEREXE5hVTxOGeffXrPC/IN4sFzHmRn+k4uaXtJ9TZKREREapRCqtQp8Q3iiW8Q7+5miIiIyBnSmFTxSKmp8PrrMH366R+jxChh6c6lFJcUV1/DREREpEaoJ1U8krc3LF4MhgHJyRATU/VjzFw5kzUH1nAk5wjXx19f/Y0UERERl1FPqnik8HDo3Nn8fuXK0ztG32Z98bf60yikUfU1TERERGqEelLFYyUmmvOlrlgBI0acxvObJRLfIB6bv636GyciIiIupZ5U8Vh9+oCXF+zaBQcOnN4xygfUwuJCDMOoptaJiIiIKymkiscKDYWEBPP7073kX+rPrD8Z9804luxYcuYNExEREZdTSBWPVrpM6ooVZ3acDYc2sOvoLhb8toDjxcfPvGEiIiLiUhqTKh6td29o1Ai6d4eiIrCe5jt26FlDyT6ezaDWg/D19q3eRoqIiEi1sxh1aJBeVlYWNpuNzMxMQkND3d0cqSaGARaLu1shIiIi1eFU85ou94vHc0VA3Z66nV8O/lL9BxYREZFqoZAqtUJREaxfD4cOnfmxNh3ZxMP/fZhnVj1DSk7KmR9QREREqp1CqtQKr74KkybBV1+d+bHaR7enVXgrusZ2JdAn8MwPKCIiItVOIVVqhR49zK8rV5pjVM+E1cvKtP7TeKjPQwT5Bp1540RERKTaKaRKrdC9OwQEQEoKbNt25scL9AnEUm6wa15h3pkfVERERKqNQqrUCr6+5nRUcOYT+5dXWFzIW+vfYuzisWQXZFffgUVEROSMKKRKrVE6sf8PP0BJSfUcs9goZu2BtaTkprDmzzXVc1ARERE5Y5rMX2qNhAQIDoaMDNiyBTp3PvNj+lv9GX/ueHam76RleEt2pu+ssJ7N30ZUYNSZv6CIiIicEoVUqTWsVjjnHFi6FH79tXpCKkAzWzMe+e8j7MrYVWmdluEtmTdiHj7ePtXzoiIiInJSCqlSq4wYAUOHQosW1XdMq5eVxiGN2XB4A01CmpCSk0JMcAxeFnM0zN7MvTQOaYzVS/9dREREaorGpEqt0rgxtGxZvatQWSwWRnYZic3Pxq6MXaTlp5Gel06QbxBFJUXY/GyM7DLSYTYAERERcS2FVKm1znS+1PISGiaQGJeIt5c3vl6+RAREYBgGyTnJtApvRVZ+Fhl5GdX3giIiInJSCqlS6+TkwPPPw803Q2Fh9RyztDc1IiCCZmHNCPELIasgi0CfQLwsXjy35jl2pO+w1997dC+zf57Nd7u/q54GiIiIiAOFVKl1AgNh40ZITYX166vvuKW9qam5qfZe1MS4RPq36E/H6I7E2eLsdX9P+53FOxazbPcyh2M8u+pZnv7haf7M+tNeZlRnl6+IiEg9oZAqtY7FAuedZ36/YkV1HtfsTQ30CWR/1n4CfQIZ2WUkd3S/g6cufIoGwQ3sdZuHNefy9pfTt1lfe5lhGKw9sJYf9v/gcNxle5Zx0+c38faGtx3KM/IyFGBFREQqoduVpVZKTIR//xt++gkKCsDPr3qOW9qbunDbQi5rdxkJDRMqrNcmsg1tIts4lBkYjO8znj+z/qRhcEN7+Z9Zf5KWl0ZBUUFZXcNg9H9GU1hSyCuDXyE2JBaA5JxksguyaRzaGH+rf/WclIiISC2kkCq10llnQUwMJCfDzz+X9ayeKYvFwqiEUeQU5jAqYVSV7uj3snjRo3EPejTu4VA+ov0IejXuRaBPoL3s2PFjFBQXUFxSTGRgpL186c6lfLzlYwa0HMDdve62ly/5YwkNgxvSKaaT5moVEZF6QSFVaiWLBfr2hU8+gZUrqy+kAnRp2IW5l8yttuMF+QbRNqqtQ1mIXwifXPEJqbmp+Hr7Ouyz+dloEtrE/ji7IJvX1r0GwIIrFuCDGVJX71/NroxddG/U3en4IiIitV2NjkmdO3cuFouF4OBgp33r16/nwgsvJDg4mLCwMIYPH86uXZWvACSSmGh+XbcOcnPd25bT4e3l7TDOFeC6+Ov4YPgHXNruUntZQXEBvRv3Jj4m3mEIwKr9q/h4y8dsSt5kL8stzOXx7x5n9s+zKTFKXH4OIiIirlJjPakHDhxg3LhxNGrUiMzMTId927Zto1+/fnTp0oX58+eTn5/PxIkTSUxM5NdffyU6Orqmmim1SIsW0KsXtG4NJXUsj5UfZhAVGMVjfR9zqtO9UXd8vX1pH9XeXnYg6wD/O/I/9mXuY3SP0fby2T/PZmvqVq7udDV9mvYBoLikmMKSQo19FRERj1RjIfXOO++kb9++RERE8MknnzjsmzhxIn5+fixatIjQ0FAAunXrRps2bXj22Wd5+umna6qZUotYLPD44+5uhfuc3/x8zm9+vkNZTFAM9/W6j8ISxwlkd2bsZPfR3Q69q3sz93LvV/fSIqwFLw1+yV6+P3M//lZ/ogKjtMqWiIi4TY1c7v/ggw/4/vvvee2115z2FRUVsWjRIkaMGGEPqADNmjUjKSmJhQsX1kQTReoEm7+NC1pewKDWgxzKx/UZx8S+E+kU08ledij7EIBTT+qrP7/KzV/czIq9ZfN7peels2LvCvYc3eO6xouIiJTj8p7U5ORk7rvvPp566imaNGnitH/nzp3k5eURHx/vtC8+Pp6lS5eSn5+Pv78uSUrFCgrMO/xDQuDss93dGs/UMLihw7RYAOfGnctHDT4ipzDHqb7Vy0rj0Mb2x1tTtvLMqmdoG9mWZwc8ay//cvuX9mNFBES4qPUiIlIfuTykjhkzhrZt2zJ69OgK96elpQEQEeH8Cy4iwlw/PSMjg9jYWKf9BQUFFBSUzT2ZlZVVTa2W2uSLL+C996BLF4XUqgrxCyHEL8Sh7KkLn6K4pNjhUr+vty8dojrQOqK1Q91Ptn5Cel46Z0WeZQ+pm5M3s/iPxZzd4GwGth7o+pMQEZE6yaUh9dNPP+XLL79kw4YNfzu27WT7K9s3c+ZMpkyZckZtlNrvvPPMkLpxI2Rmgs3m7hbVft5e3g6PK5r/1TAM+jfvz59ZfzpMmbUtdRsr963Ey+LlEFIf+uYh/K3+3NXzLvusBkUlRXhbvDX2VUREnLgspB47doyxY8dy991306hRI44ePQrA8ePHATh69Cg+Pj5ERpoTmZf2qJaXnp6OxWIhLCyswteYMGECDzzwgP1xVlYWTZs2rd4TEY8XGwtt2sAff8CPP8KQIe5uUf1QuozsibrGdsXb4u0wXKCgqIBtadsACPAJsJcv+n0RH2z8gIvbXMyohFH28n2Z+4gJitHMAyIi9ZjLQmpqaipHjhzhueee47nnnnPaHx4ezj/+8Q8++eQTAgIC2LRpk1OdTZs20bp160rHo/r5+eFXXethSq2WmGiG1JUrFVLdrWV4S1qGt3Qo8/by5pmLnuFQ9iFC/cpukDyQdYCC4gKHVbQKiwu5a/FdGBi8f9n7hPmHAbDn6B6O5h+leVhze5mIiNRdLgupDRs2ZNmyZU7lTz31FN9//z1LliwhKioKq9XKsGHD+Oyzz5g1axYhIeb4uH379rFs2TLuv/9+VzVR6pDERHj7bdiyBdLSIDLy758jNcfqZaVdVDvaRbVzKL+j+x1c2u5Shx7TjPwMQvxCKCopwuZXNnZjyR9LWLxjMZe3v9zeg1tUUsT8LfNpEtqEc5ue6zRMQUREai+XhVR/f3/69evnVP7uu+/i7e3tsG/KlCn06NGDoUOH8sgjj9gn84+KiuLBBx90VROlDomKgvbtYetW+OEH+Mc/3N0iORUnziIA5lyvHw7/kNzCXIexqjZ/G01CmhBni7OXHTl2hHmb5+Hn7UdiXKK9fMkfS9ibuZe+zfrSIbqD609ERESqXY0ui1qZdu3asXz5cnx8fLj88su56aabaN26NStWrNBqU3LKSpdJ3bvXve2Q6hHoE+jw+NrO1zJ76GySWiTZy7wsXlzU8iIS4xIdAu2aP9fwnz/+w/7M/fay5Jxk7l1yLy+secHhuFo+VkTEM1kMwzDc3YjqkpWVhc1mIzMz02FhAKkfjh2D7GzzRiqp31btX8Xvab+T1DyJZmHNAFh3cB1Tvp9CM1szXhnyir3utO+nsevoLu7sdie9mvQCzBu9so9nExkQedozD6TmppKZn1npfpu/jajAqNM6tohIbXaqea3GlkUVcbXgYHMT6dO0D32a9nEoOyvyLB5PdF5Hd3/WflJzUx1mHdiaupUnlj1By7CWvDj4RXv5luQtBPgE0CS0Cb7evpW+fmFxIXcuupNdGbsqrdMyvCXzRsxzuGlMRETKKKRKnZSXBwEBf19P6o9Qv1B7T2l5zw14jj+z/qR5WHN7WUZeBt4Wb6KDHIcbvfrzq+zP2s/UflNJiE0AzBkKNhzeQKvwVrSPbg/8NdY2pDEbDm+gma2Z02vuzdxL45DGWL30ESwiUhl9QkqdYhjw9NOwdi288go0bvz3z5H6LcQvxB4uSyW1SCKxWSJ5hXn2MsMwCPcP52j+UYebvTYe2cgbv7xB99juTOo3CTDnkA33D6e4pJj8onwiA8umm8jMz8TmZ2Nkl5FaxEBE5CQUUqVOsVggPx+Kisw5U6++2t0tktrK6mV1WDLWYrEw44IZgBlYS0UGRtKrcS+HWQQMw2Bj8kYsFgvJOclEBERgsVg4nH2Y7Wnb6dGoBwkNE+z1P978MVYvKwNbDbS/Zl5hHkUlRQT7BivMiki9pJAqdU5iIvzyC6xYoZAqrlE+NPZs3JOejXs67C8sKeT6+OtpFd6K/+7+L1kFWdj8bWQfz8ZisdCzcU/7MQzD4F9b/kVRSRHnNzvfHlKX7lrKm+vfJDEukfHnjrcf+4U1L+Bl8eL6+OuJCIgAzJu0UnNTiQqM0s1YIlJneMQUVCLVqXdvsFph/35NRyXu4evty/D2w5l10Sz6xvUlOScZwzAoMUoY0X4E9/W+z163xChh2FnDuKDFBQ6rceUW5gI4lBmGwfd7v2fprqUUlxTby3/c9yMPLX2Idza849COCf+dwCP/fYQjx47Yy/Zn7ufbXd+yPXW7Q906NNGLiNQR6kmVOicoCLp1M8elrlgBN9zg7hZJfWWxWBjZZSQr961kf9Z+gn2DuavnXTQIbmCv4+3lzc0JNzs99+pOV3N5h8spKimylxkY3NHtDnvPbCkfbx8aBDVw6EU1DINtadsoKilyWIlr/aH1zN0wl75xfXko6iF7+c1f3ExxSTFPXvAkTUKbALA9dTs/HfiJ1hGtOafpOfa6ablpBPoE4m/111AEEXEZhVSpk/r2NUPqypVw/fXmWFURd0homEBiXCILty3ksnaXOYxF/TtWL6vDDABeFi8GtR7kVG9ImyEMaTPEqXxKvylkFWQR5h9mL4sMjCShYQKtIlrZywzDICMvg2Kj2GGJ2m2p25j/23z6xvV1CKn3fX0fR/OP8tKgl2gR3gKADYc28M3Ob+gY05GhZw21192ashVfb1+a2pqedNouEZETKaRKndSzJ/j6wqFDsHMntG7t7hZJfWWxWBiVMIqcwhxGJYyqsZ5Hi8VCfIN4p/Lz4s7jvLjznMrnXjKXrIIswv3D7WXNw5oztM1QWkeU/QcyDIOCogLAcSjC3sy9/LD/B7y9vB1C6pM/POkUaFftX8W/Nv+LrrFdGdllpL3u8j3L8bJ4kdAwwT421zAM9daK1FMKqVIn+fvD8OEQEgIxMe5ujdR3XRp2Ye4lc93djEpZLJYKb7o6u+HZnN3wbKe6/7r8XxQUF+Dn7Wcvj28Qz+1dbyc2pGzJN8MwiAqIwtvi7TA84cixI+w6uos4W5zDseeun0tmQSYvD37ZHlK/2/0ds9fNpk/TPjxwzgP2uvO3zKfEKOGilhfZp/jKK8wjryiPEN8QLZIgUgcopEqddd117m6BSN1ksVgchgWAuYJWy/CWTvX+OeifTs9PbJZInC3OoScWzKB7NP+ow/CErIIsCooLnG7s+mL7F2QWZNK7SW97SP1x/4+8uPZFusV2Y3K/yfa6L699mcKSQq7pdI09RKfnpXMg6wBRgVEOwVpEPIdCqoiI1KjKpsoqP9VWqSFthnBO03Pwtng7lA9uPZiM/AwiA8oWSsgvysfL4uUUftccWENWQRYj2o+wl60/tL7CQDtx2URyjudwd6+77auQHcg6wKbkTTQKaeQwhEJDEURcSyFV6rRjx2D1anNy/8GD3d0aEakqP6sfDYMbOpVfF+98qWToWUO5uM3FFJYUOpTfknALmfmZDsHYx8uHJiFNnI69I32HOZ8tZeHzt5TfePXnV+kW280hpI75zxiOFhxl8vmTaRvVFoBdGbtYvmc5LcJakNQiyV43OScZf6s/Ib4hCrYip0ghVeq0rVvhpZcgPBwGDgQvzQwsUqdZLBanWQT6t+jvVO/85udzfvPzncqf6PsEWQVZDtOERQRE0LNRT86KPMuhbmZBJseOHyPAJ8BetjN9Jwu3LaR7bHeHkDpx2UQOZB9g5gUz6RTTCYAtyVtYuG0hZ0WexZUdr7TX/S3lN7wsXjSzNXM4tkh9o5AqdVpCAgQHQ0YGbN4M8c43O4uI2LWPbu9U1q1RN7o16uZUPvvi2WQVZDn0xsbZ4ri07aX2uWZLlc53W34owsHsg6w9sJYSo8Sh7otrXuTgsYMOgfaXg7/w1oa36BTTiTE9xtjrfr/ne4pKiuga25XwAHNmhhKjBAsW9dhKraeQKnWa1QrnnANLl5pzpiqkikh1sfnbHGYtAGgb1dZ+6b+8uZfMNRdWKDe2tkN0B8b2GGtf3rZUTFAMxUaxww1kaXlp7M/a7zQ84cNNH3Lo2CGeuuApe0hd++danv7xabrGdmXi+RPtdT/57RPyCvO4sOWF9pvF8grz7ItDnHgznIi7KaRKnde3rxlSf/wR7rjDDK4iIjWt/MIMAI1DG9M4tLFTvWn9pzmV9Wzckxn9ZxBgdbz8f3aDs4kNjrXPcADmjAjFRrHDuFqAb3Z+w6Fjh+jWqJs9pP56+Fee/OFJ2kW245kBz9jrvr7udTLzM7my45X2+W2P5h9lV8YuogKjnKYPE3EF/bqWOq9zZ7DZIDMTNm6Erl3d3SIRkaoJ8w9z6FktNbbnWKeyC1peQPdG3TFwnLZrUOtBpOSkEBNUNnl0QXEBvt6+TjMirD+0nkPHDnFJ20vsZb+l/MbMH2bSPqo9sy6aZS+fvmI6yTnJjO4+2j5c4sixI/x04CdiQ2Lp3qi7vW5RSZFTWBepjN4pUud5e8O558LixbBihUKqiNRtVi+rQ89qqeHthzuV9Wvej37N+9nHzJa6qctNpOWm0Sikkb3Mx8uH5rbmTuNt92Xu49CxQw5lOzN2Mmf9HNpHtXcIqeOXjmdf5j4eS3yMhFhzieD9mftZsmMJTUKbOCzve+TYEaxeVmz+NgXbekr/6lIvJCbCkiWQl+fuloiIeJ4TQ2Cfpn2c6vRo3IMejXs4lT9y3iMczT/qMATA5mfj3KbnOgXa7IJsCooLHMa/7svcx5e/f0n7qPYOIfXpH5/mj/Q/eKLvE/Rs3BMwpwj7YOMHtAhr4bCk7m8pv1FcUkzzsOb21cqk9lNIlXqhY0d47z0IC3N3S0RE6pYTVxoD6BjTkY4xHZ3KXxz8IlkFWQ43izUObcyVHa6ssPf3xMUZjhw7wi+HfiGv0LHHYe76ufyR/gcT+060B+nfUn7jn6v/SZvINg4LRazcu5Kcwhy6xna1D30oLikGwNvLcdEIcS+FVKkXLBYFVBERdwv0CSTQJ9ChrHlYc/vqXuU9P/B5p+Vw20S24b5e9xHkG+RQ3iCoAXmFeQ7jdtPz0jmcc9gp/C7cttAeaEtD6paULTz23WO0jWzLswOeLau7dSEZ+Rn0b9Hf3sb8onxSc1Ox+dnUa+tiCqlS76Snm3On+vr+fV0REXGfE+d6jQmK4YKWFzjVe/i8h53KujTswqwLZzn1jp7d4GwiAiIcbiDLKsgCzHG35a3Yu4IdGTvoHNPZHlK3p27n8WWP0zS0Ka9d/Jq97tz1czl87DAj2o+w30CWXZDNbym/EREQQZvINlU4c9dKzU0lMz+z0v02f1uFSxfXNIVUqVdeeAG++w7Gj4fzznN3a0RExFWCfYMrXJyh/FjWUn2a9uGDyz5wuoFsQKsBdD7W2WFs7fHi4wT5BGHzc5wjd3PyZnZm7GRgq4H2sp0ZO5m+cjpxoXG8evGr9vJZP85id8Zubu16q32hiPS8dL7f8z3RQdGcF1f2C+p48XF8vHyqbXGGwuJC7lx0J7sydlVap2V4S+aNmIePt0+ldWqCQqrUK+HhYBjmxP4KqSIiAubY1xMXZgAY3GawU1mPxj34+PKPnYYi3BB/Aym5KfZ5ZcG8Ia1NRBtig2Md6h7MPsif2X86TBO2L3Mfb//6Ns1szRxC6rTvp7EpeRPj+oyzlx8+dphPf/uUBsENuLzD5fa6h48dBsylfE9cHrh8mxqHNGbD4Q00szVz2r83cy+NQxp7xIwK7m+BSA1KTIRPPoF16yA3FwID//45IiIiJzqxZ7OipXM7xXTi+YHPO5U/1Och0vPSHcbihviG0K9ZP6cVyEoXZyi/kMPhY4f5audXNLM1cwipr/70Kr8e+ZUHej9AUoskwJzi6/V1r9MopBFje47FYrEwsstIlu5aytH8o8QExdh7TDPzM7H52RjZZaRHLKurkCr1SosW0LgxHDgAa9dCUpK7WyQiIvVNRauNtYpoxYN9HnSq+8yAZ8guyCbYN9heFhMUw3WdryPIx/EGMm8vb6fFGdLy0tiYvNE+7hYgoWECQb5BbE3Ziq+3L9FB0RiGQXJOMgNbDSShYUJ1neoZUUiVesViMXtTP/7YvOSvkCoiIp7M19vXaYaCRiGNuLrT1U51J/ebDOAwFCHOFsdDfR5yuHxvsVi4oPkF7Dm6h+PFxwGzxzbQJ9BjelEBvNzdAJGa1rev+XXDBsjOdm9bREREqlv5kBkREEHfZn2dFmiYeeFMru10LbmFufZe1MS4RI/pRQWFVKmHmjaF5s2hqAjWrHF3a0RERGpe6djUQJ9A9mft97heVNDlfqmnrrwSCgqgd293t0RERMQ9EhomkBiXyMJtC7ms3WUe1YsKCqlSTyUmursFIiIi7mWxWBiVMIqcwhxGJYzyqF5UAItx4kRftVhWVhY2m43MzExCQ0P//gkiIiIiUqNONa9pTKrUW8eOwb//DbNnu7slIiIiciKFVKm38vNh7lxYsgTS0tzdGhERESlPIVXqragoaN/eXCb1hx/c3RoREREpTyFV6rXSOVNXrnRvO0RERMSRQqrUa+eea65CtX07HDni7taIiIhIKYVUqdfCw6FzZ/N79aaKiIh4DoVUqfdK50xVSBUREfEcCqlS7/XpA/7+0LAhHD/u7taIiIgIaMUpEUJD4cMPwdfX3S0RERGRUupJFUEBVURExNMopIqUc/AgpKe7uxUiIiKikCryl7fegjvugP/8x90tEREREY1JFQFSUiAwEHJyzJB6zjnm/KmlQkMhOtp97RMREalvFFKl3ktJgWuvNb/u2WMuk7ppE/j5ldWJjISPPlJQFRERqSm63C/1XlYWpKVBQIAZRq1WKCmBsDBz8/Mz92dlubulIiIi9Yd6UkX+EhAAsbFw7Ji5BQWV7SsocF+7RERE6iP1pIqUY7OBt7c5qX9ysuM+w3BPm0REROoj9aSKlOPlBeHhkJoKPj5l5Tk5cPfd0KaN2dvasGHZ10aNICrKfK6IiIhUD4VUkRPExZmX/gMDy8oKC81L/rt3m9uJHnkEzj3X/H73bti40QyxsbHQoIEWCxAREakqhVSRE1itZrgsz2aDqVPNm6gOH4ZDh8q2I0cc6//6K7z9tuPzIyPLQutll0HTpma5YThOdSUiIiImhVSRv+TlVV5usZgBs1Ur5/0lJY5BMzbW7FU9dMgMtLm55uwAaWmweTMMGVJW94sv4F//ch4+UPo4PFwhVkRE6ieFVKn3QkPNns60tMrv4o+MNOtV5MSxqL17mxuYPaXZ2WW9rocPQ+PGZXUPHTL3Z2fDH384H/v5581xsABbtsDevWUBNjra7PUVERGpi/QrTuq96Ghzov6TzYN6uitOWSzmc0NDoW1b5/033QSDBpUF2PLDCFJSzEBa6scf4csvyx57eUFMTFloveYas+cVNIxARERqP4VUEcwA6o7VpPz9oXlzcztRUZFjT2mLFtCrV1mgPX7c/Hr4sDkO9vrry+q+9RasWFEWYE+ckSA0VCFWREQ8m0KqiIc68VL+RReZG5g9pRkZjr2uISFldQ8eNPdnZMDWrc7H/uAD82YwgJ9/NuuVhtioKAVYERFxP4VUkVrIYoGICHPr2NF5/4MPmkG1dAhB+aEEeXmO42u//hrWri17bLWa02aV9sDedFPZFFoaRiAiIjVFIVWkDgoKMm+4Kr3pqryiIseg2a6dWXbokLnKVlERHDhgbv7+cNttZXWfegp+/70swJ44jKD8UrIiIiJnQiFVpJ45cRjB5ZebG5jTaaWmlvXClk6/VergQXN/aips2uR4HF9f+OSTsvorVpiLIJSG2bAw9cKKiMipc1lI/e677/jggw9YtWoV+/fvJywsjO7duzNx4kS6devmUHf9+vWMHz+eNWvWYLVa6d+/P88++ywtW7Z0VfNEpAKlMwbExFS8f/r0yocRnBhCP/nEcXUuf3+zt7VhQ3NVrxtuKNunYQQiIjUnJcU1M9pUN5eF1NmzZ5OWlsa9995Lhw4dSElJ4bnnnqN37958/fXX9O/fH4Bt27bRr18/unTpwvz588nPz2fixIkkJiby66+/Eu0JPyURAcybrWw2aN/eeV9hoePj+Hjzg670xq78fNizx9wOHHAMqePGmXPFlh8+UH44gZaVFRGpHikpcO215tzglYmMNKdmdHcEsxiGYbjiwMnJycSc0B1z7NgxWrduTadOnfjvf/8LwJVXXsmyZcvYuXMnoX/dzbF3717atGnD/fffz9NPP33Kr5mVlYXNZiMzM9N+LBFxv6Iic/nY0p5XX18YMMDcZxhw9dXmylwVadIEZs8ue/zNN2W9srGxjrMaiIjIye3cCVdcYS7zHRDgvD8vz1zYZsGCildZrA6nmtdc1pN6YkAFCA4OpkOHDuzfvx+AoqIiFi1axI033ujQyGbNmpGUlMTChQurFFJFxDNZreZKW+VX2yrv1VedFzMofVx+QQOAt9+GnJyyx0FBZT2u7drBP/5Rtk/DCEREKhYQUHaza0mJ4+qJla2+WNNq9MapzMxM1q9fb7/Uv3PnTvLy8oiPj3eqGx8fz9KlS8nPz8ff378mmykiNchiMedmjYqCTp0c9xmG4zCCoiJzydnSAJuebgbWHTvMLS/PMaSOGgWBgY4zEDRqZH5t0EDLyorUd4ZhBrTiYuctKMjsbQTzcyYlpeJ6xcXmOPuoKLNuejr873+VHzc+Hlq3NuseOQKLF5vlFdU/7zzo2dOse/AgvPGG4/7yzxk4EC6+2Kx76BA88ojz8UpKzLGoaWnmfQRgLgxz9Gjl9yK4U41+RI8dO5acnBwee+wxANL+GhARERHhVDciIgLDMMjIyCA2NrbC4xUUFFBQLu5nnWwUsIjUOhaL43hUqxXuu6/scUFB2apbhw6V/ZIA85dKWpq5/XXxxkHXrjBlStnjL74wn18aaCu6DFYVteXGBBEww5phmEHGai27ApGba27lw1BRUdnjZs3K/o8ePGj+PzyxTunWu3fZ8Jxt22Dz5sqD3CWXmH9IAvzyCyxbVnmQGzWqLPStWgXz5pW14cTn3H8/dO9u1l2+HJ57rvKfyUMPQd++5vf/+x/MnFl53bvuMkMiwN698Pzzlde97bay9qanw2efVV63SZOykJqfD+vXV143Pb3se8NwfFxe6b91KYvF8bEnqbGQ+sQTT/Dhhx/y8ssvO93dbznJ9biT7Zs5cyZTyv+WEZF6xc/P/CXZrJnzvsBAePPNUxtGkJtr1i3PZiu7eSshAZKSyvb93TCC2nRjglSsopBVuhmGY6/Tn3+afxRVVB+gR4+yuuvXO/bIlX+OYcCVV5bVXboUdu2qvOds3LiyqwHz58OGDRW3t6TEDGOll3bnzoVvv624rQDvvmu+P8F8j/7735X/nF5/vWwYz7ffmu2oTMuWZSF140Z4//3K6557bllIPXgQvv++8rrl/xjMyTFvzqzM8eNl35e/vH0iLy/Hn4m/P4SHg7e3uc/b23ErPzbeZjM/M8rvL/+cJk3K6kZGwqWXmv+OpXXKf9+hQ1nd6Gjzj/QT65Ru5T/ToqPhxRcdj1najn374Oaby+r6+JT9rD1NjYTUKVOmMH36dGbMmMFdd91lL4/8639BWgWf5Onp6VgsFsJK+6MrMGHCBB544AH746ysLJo2bVp9DReRWstiKZvyqksXx30nDiM4fhzOP98MsAcPmjMNZGaa27Zt5i+o0pCalwcjRzrOQlD++6iosstpJ7sxIS3NrOfJIbW0x6U0sFksZZc/DcNc/KGiAFVUZAaiuLiyY/38s/kzr6h+eDj06lVWd+FC59670i06umxeXzBvqjt6tKzXrPxxY2Kg3K8IpkwxZ5YorVO+py0qCl55pazuuHHmDSYVCQtzDFgvvwy//VZxXX9/8waUUl98YfYMVsRicQyp69aZPYOVuf/+spB64IDZM1mZoqKy748fh2PHKq9bPpxVFohKt/J/rEVFmT2ElQW58v8XWrSACy4oC08nPqc0JIO5qt6tt5r7K6rfvHlZ3a5dzanyytcp/5zyx+3d21wiuqIweeIfoV27wnvvVf4zK69lS5g69dTqxsTALbecWt2QEPNndip8fMx2VCQz8+QB3ZO4PKROmTKFyZMnM3nyZB599FGHfa1atSIgIIBNJ84KDmzatInWrVufdDyqn58ffqWfmCIip+jEYQRhYWYoKZWT4zgXbPk7XEsXOdi923Ee2FKDBpkbmIEuO9v86u1dFvpKQ8KBA2XHzs6GlSsr7gUrLjZvCiu9CJWdbYakysbHde0KQ4aYdY8dM39hVlb3nHPKfknm55tTg5UPh+Wddx48/HDZ41tvrfxn3L07TJpU9vippxx7scrr1MkxpH76qfmLtCJt2jiG1HXrzLBckROHWyQnm/+mFQkMdHzs7e1cpzTA+Pg4lkdGmj1RFYWdE39FtW3rGJpOrF++l75PH2jatOJwWLqVGjLE7LGtrF751eCuvtq8nH7ia5eG0fI/i1GjzO1UDB5sbqeiRw/HHuaTadmy8sB1oshIxyB6Mr6+9Xt6u7y8qpW7g0tD6rRp05g8eTKPP/44k8p/WpW+uNXKsGHD+Oyzz5g1axYhf/WX79u3j2XLlnH//fe7snkiIhUKCjLDY0XTrzRtavbelR86UNoDe+SI4yW3ggLz0tqJiorMbd26sjFvR486TrV1oksvLQupBQWwZEnldcPDy743DNi6tfK6R4+Wfe/tbQbVypQPrRaL2TPm5VVxr1n5NoAZsgsLK+4JO3G4Rv/+5jlWFORODCDXX2+2uaJgdmLwvP9+sw0V1T0xeE6fbp5jaRu9vCof4jF+fOU/sxNdc82p1z3//FOv27atuZ2KiAhzk/opNNT8f5SWVvld/JGRZj13c9k8qc899xzjxo1j0KBBFQbU3r17A+Zk/j169KBr16488sgj9sn809PTqzyZv+ZJFRF3Kr2EvH+/OQ+hvz9kZJi/CEp7yCwWs05Bgdm7eNNN5nPT0807dyvrCevc2exZA/NS+BdfVH4JtmnTsrFspWG4skuwNltZsDYMM2hXNpautLdNRGo3d9/Yeap5zWUhtV+/fnx/kpHO5V/2l19+4eGHH2b16tUOy6K2quIssgqpIuIJSifLDgtzvMxaKifH7MF05WTZIiKeyu2T+S9fvvyU63br1s2+ApWIiIiIiC7ciIi4SG24MUFExFMppIqIVLPadGOCiIinUkgVEalm0dHmJOhacUpE5PQppIqIuEB0tEKoiMiZqCVrDoiIiIhIfaKQKiIiIiIeRyFVRERERDyOQqqIiIiIeByFVBERERHxOAqpIiIiIuJxFFJFRERExOMopIqIiIiIx1FIFRERERGPo5AqIiIiIh5HIVVEREREPI5CqoiIiIh4HIVUEREREfE4CqkiIiIi4nEUUkVERETE4yikioiIiIjHUUgVEREREY+jkCoiIiIiHkchVUREREQ8jkKqiIiIiHgchVQRERER8TgKqSIiIiLicRRSRURERMTjKKSKiIiIiMdRSBURERERj6OQKiIiIiIeRyFVRERERDyOQqqIiIiIeByFVBERERHxOAqpIiIiIuJxFFJFRERExOMopIqIiIiIx1FIFRERERGPo5AqIiIiIh5HIVVEREREPI5CqoiIiIh4HIVUEREREfE4CqkiIiIi4nEUUkVERETE4yikioiIiIjHUUgVEREREY+jkCoiIiIiHkchVUREREQ8jkKqiIiIiHgchVQRERER8TgKqSIiIiLicRRSRURERMTjKKSKiIiIiMdRSBURERERj6OQKiIiIiIeRyFVRERERDyOQqqIiIiIeByFVBERERHxOAqpIiIiIuJxFFJFRERExOMopIqIiIiIx1FIFRERERGPY3V3A0RERETEDUpKYMsWyMiA8HDo2BG8PKf/0iNC6rFjx3j88ceZP38+6enptGvXjkceeYSrr77a3U2rnIf/w1Yrnau7WyW1md5TIuKJVq2CV16BrVuhoAD8/KB9e7jrLujTx92tAzwkpA4fPpyff/6Zp556irPOOouPPvqIa665hpKSEq699lp3N89ZLfiHrTY617p5rlIz9J4SEU+0ahWMGwfp6RAbCwEBkJcHGzaY5c8+6xGfURbDMAx3NmDx4sVcfPHF9mBaasCAAWzZsoV9+/bh7e19SsfKysrCZrORmZlJaGioaxpc2T/s4cNmL4mH/MNWC51r3TxXqRl6T4mIJyopgeuvh/XroXVrsFjK9hkG7NwJXbvC+++77KrPqeY1t4fU2267jY8//piMjAys1rKO3Xnz5nHttdfy448/0ucUP8hdHlLL/8M2aQJpaWX7DAOSk6F5c7jzTqgsWJd/M1T2/ak+p6p1qvL6JSXwwguwezc0bOj8Jj58GFq1gvvuq/2XLktK4J//hF276v65Ss04lfdUy5Zl76kz/Rj+u+efbL87X/tMj+/utp/J67vz53amx3fnz83Vx/fktlfXax84AO+9Z/7h7OdXtj88HGw2OHYMjh4163TufGavWYlTzWtuv9y/efNm2rdv7xBQAeLj4+37KwupBQUFFBQU2B9nZWW5rqFgjivbutXsFSkshIMHHfcXFZl13noLXNWTW1OysmDjRrBaYf9+5/1FRfDrr2aQrQvn+r//1Y9zlZpxKu+p//0PXnxR7ykRqVlpaWYILShw/APaz88MqQEBcOSIOY7ezdweUtPS0mjZsqVTeUREhH1/ZWbOnMmUKVNc1jYnGRnmP2pAgBlSGzRw3F9SYv7j9+gBbdqYZeX/8qnsr6BTqXOyeqfznL+rs3MnbNsGERFlvYfl65WUmD+Pjh3NXsbarPRcw8Mr7imtS+cqNaOi91T5XwYlJeYwgE6dTv09dbIrJKey313Hro7j19Vzc/fP9UyO7+6219X3hKuPbbHAvn1mJ1tQEPj7l+0LCTG/5uWZgTU8/PTbWU3cHlIBLCf5oZ5s34QJE3jggQfsj7OysmjatGm1ts1BeLj5D5eXB8HB0KyZ4/5jx8DHB0aOdFkXeY3ZtMkcUxcWZp7riY4dM8P66NF141xXr64f5yo141TeU/7+5tAgvadEpCaVlJhXBzdsMK8MVzQcqWtXs2PGzdw+wC4yMrLC3tL09HSgrEe1In5+foSGhjpsLtWxo3ln7uHDzr2Spf+wHTp4xD/sGdO5murauUrN0HtKRDyVl5c5w0h4uHnV59gxKC42v+7caZaPHesR92C4vQWdO3dm69atFBUVOZRv2rQJgE6dOrmjWRWrRf+wZ0znWjfPVWqG3lMi4sn69DFnGElIMMen7tljfu3a1aNmHnH73f1LlixhyJAhfPzxx1x11VX28sGDB7Nx40bPm4IKKp77sEMH85eOh/zDVhuda908V6kZek+JiCdz02IjtWYKKjDnRF23bh1PP/00rVu3Zt68ebz55pt88MEHXHfddad8nBoLqVC/VpHRubq7VVKb6T0lIuKgVoXUY8eO8dhjjzksizphwoQqL4taoyFVRERERKqsVoXU6qKQKiIiIuLZTjWv6ZqTiIiIiHgchVQRERER8TgKqSIiIiLicRRSRURERMTjKKSKiIiIiMexursB1al0ooKsrCw3t0REREREKlKa0/5ugqk6FVKzs7MBaNq0qZtbIiIiIiInk52djc1mq3R/nZontaSkhIMHDxISEoLFYnH562VlZdG0aVP2799f5+dl1bmKnD69p0TEU7nj88kwDLKzs2nUqBFeJ1mBr071pHp5edGkSZMaf93Q0NB684tH5ypy+vSeEhFPVdOfTyfrQS2lG6dERERExOMopIqIiIiIx1FIPQN+fn5MmjQJPz8/dzfF5XSuIqdP7ykR8VSe/PlUp26cEhEREZG6QT2pIiIiIuJxFFJFRERExOMopIqIiIiIx1FIFRERERGPo5B6EtnZ2YwfP54BAwYQHR2NxWJh8uTJFdZdv349F154IcHBwYSFhTF8+HB27dpVsw0+A7/++isXX3wxcXFxBAQEEBERwTnnnMMHH3zgVLe2n+vy5cuxWCwVbmvWrHGoW9vPVaqfqz4XXn75Zdq1a4efnx8tWrRgypQpFBYWuvBMRKSu+e6777j55ptp164dQUFBNG7cmH/84x/88ssvTnVrw+eTQupJpKWlMWfOHAoKCrj00ksrrbdt2zb69evH8ePHmT9/Pm+//Ta///47iYmJpKSk1FyDz8DRo0dp2rQpTz75JIsXL+a9996jefPm3HDDDUyfPt1ery6ca6knn3yS1atXO2ydOnWy769L5yrVxxWfCzNmzODee+9l+PDhfP3114wZM4Ynn3ySsWPHuvhsRKQumT17Nnv27OHee+9l8eLFvPjiiyQnJ9O7d2++++47e71a8/lkSKVKSkqMkpISwzAMIyUlxQCMSZMmOdW74oorjKioKCMzM9NetmfPHsPHx8cYP358TTXXJXr16mU0bdrU/rgunOuyZcsMwFiwYMFJ69WFc5XqV92fC6mpqYa/v79x++23Ozx/xowZhsViMbZs2eKaExGROufIkSNOZdnZ2UaDBg2MCy64wF5WWz6f1JN6EqWXgE+mqKiIRYsWMWLECIc1b5s1a0ZSUhILFy50dTNdKioqCqvVCtT9cy2vPp2rVE11fy589dVX5OfnM2rUKIdjjBo1CsMw+Pzzz6u1/SJSd8XExDiVBQcH06FDB/bv3w/Urs8nhdQztHPnTvLy8oiPj3faFx8fz44dO8jPz3dDy05PSUkJRUVFpKSk8Nprr/H111/z8MMPA3XvXMeOHYvVaiU0NJSBAwfyww8/2PfVtXOVmlWV98/mzZsB6Ny5s0O92NhYoqKi7PtFRE5HZmYm69evp2PHjkDt+nxSSD1DaWlpAERERDjti4iIwDAMMjIyarpZp23MmDH4+PgQExPD/fffz0svvcQdd9wB1J1ztdls3HvvvbzxxhssW7aMF198kf3799OvXz++/vproO6cq7hHVd4/aWlp+Pn5ERQUVGHd0mOJiJyOsWPHkpOTw2OPPQbUrs8nq0uPXo+c7PLf310a9CSPPvoot956K8nJyXz55Zfcdddd5OTkMG7cOHud2n6uCQkJJCQk2B8nJiZy2WWX0blzZ8aPH8/AgQPt+2r7uYp7ner7R+8zEXGFJ554gg8//JCXX36Zbt26OeyrDZ9PCqlnKDIyEqDCvybS09OxWCyEhYXVcKtOX1xcHHFxcQAMGTIEgAkTJjBy5Mg6d67lhYWFMXToUF5//XXy8vLq9LmK61Xl/RMZGUl+fj65ubkEBgY61T3xF4uIyKmYMmUK06dPZ8aMGdx111328tr0+aTL/WeoVatWBAQEsGnTJqd9mzZtonXr1vj7+7uhZdWjZ8+eFBUVsWvXrjp/roZhAOZfhnX9XMW1qvL+KR3rdWLdw4cPk5qa6jAtmojIqZgyZQqTJ09m8uTJPProow77atPnk0LqGbJarQwbNozPPvuM7Oxse/m+fftYtmwZw4cPd2PrztyyZcvw8vKiZcuWdfpcMzIyWLRoEV26dMHf379On6u4XlXeP4MGDcLf3593333X4RjvvvsuFovlpHOxioicaNq0aUyePJnHH3+cSZMmOe2vVZ9PLp3gqg5YvHixsWDBAuPtt982AOOKK64wFixYYCxYsMDIyckxDMMwtm7dagQHBxt9+/Y1Fi9ebHz22WdGp06djEaNGhnJycluPoNTc9tttxkPPvig8a9//ctYvny58cknnxhXXXWVARgPPfSQvV5dONdrrrnGePjhh40FCxYYy5YtM+bMmWO0bdvWsFqtxtKlS+316sK5imtU9+fC9OnTDYvFYjz66KPG8uXLjWeeecbw8/MzbrvtNnecnojUUs8++6wBGIMGDTJWr17ttJWqLZ9PCql/o1mzZgZQ4bZ79257vXXr1hkXXHCBERgYaISGhhqXXnqpsWPHDvc1vIrefvttIzEx0YiKijKsVqsRFhZmnH/++cb777/vVLe2n+vMmTONLl26GDabzfD29jaio6ONyy67zPjpp5+c6tb2cxXXcMXnwosvvmicddZZhq+vrxEXF2dMmjTJOH78eA2dkYjUBeeff36ln00n9kvWhs8ni2H8NRBPRERERMRDaEyqiIiIiHgchVQRERER8TgKqSIiIiLicRRSRURERMTjKKSKiIiIiMdRSBURERERj6OQKiIiIiIeRyFVRERERDyOQqqIiIiIeByFVBERERHxOAqpIiIiIuJx/h8y7FJaDxO7zgAAAABJRU5ErkJggg==",
|
122 |
+
"text/plain": [
|
123 |
+
"<Figure size 800x500 with 1 Axes>"
|
124 |
+
]
|
125 |
+
},
|
126 |
+
"metadata": {},
|
127 |
+
"output_type": "display_data"
|
128 |
+
}
|
129 |
+
],
|
130 |
+
"source": [
|
131 |
+
"# Top5 invalidity\n",
|
132 |
+
"CompoundT5 = [79.28, 71.2, 24.4, 18.9, 20.2]\n",
|
133 |
+
"ReactionT5 = [0.08, 0.06, 0.06, 0.06, 0.1]\n",
|
134 |
+
"T5Chem = [56.56, 65.17, 43.11, 36.95, 30.74]\n",
|
135 |
+
"\n",
|
136 |
+
"\n",
|
137 |
+
"# plot\n",
|
138 |
+
"import matplotlib.pyplot as plt\n",
|
139 |
+
"fig, ax = plt.subplots(1, figsize=(8, 5))\n",
|
140 |
+
"\n",
|
141 |
+
"\n",
|
142 |
+
"ax.plot([10,30,50,100,200], ReactionT5, \"o-\", label='ReactionT5', color='red', alpha=0.7)\n",
|
143 |
+
"ax.plot([10,30,50,100,200], CompoundT5, \"s--\", label='CompoundT5', color='blue', alpha=0.7)\n",
|
144 |
+
"ax.plot([10,30,50,100,200], T5Chem, \"v:\", label='T5Chem', color='green', alpha=0.7)\n",
|
145 |
+
"\n",
|
146 |
+
"\n",
|
147 |
+
"# plt.ylim(0, 35)\n",
|
148 |
+
"ax.set_xticks([10,30,50,100,200])\n",
|
149 |
+
"ax.set_xticklabels([10,30,50,100,200], fontsize=12)\n",
|
150 |
+
"ax.set_yticks([0, 20, 40, 60, 80, 100])\n",
|
151 |
+
"ax.set_yticklabels([0, 20, 40, 60, 80, 100], fontsize=12)\n",
|
152 |
+
"# plt.tight_layout()\n",
|
153 |
+
"ax.legend(loc=\"best\", fontsize=12)\n"
|
154 |
+
]
|
155 |
+
}
|
156 |
+
],
|
157 |
+
"metadata": {
|
158 |
+
"kernelspec": {
|
159 |
+
"display_name": "reactiont5",
|
160 |
+
"language": "python",
|
161 |
+
"name": "python3"
|
162 |
+
},
|
163 |
+
"language_info": {
|
164 |
+
"codemirror_mode": {
|
165 |
+
"name": "ipython",
|
166 |
+
"version": 3
|
167 |
+
},
|
168 |
+
"file_extension": ".py",
|
169 |
+
"mimetype": "text/x-python",
|
170 |
+
"name": "python",
|
171 |
+
"nbconvert_exporter": "python",
|
172 |
+
"pygments_lexer": "ipython3",
|
173 |
+
"version": "3.11.8"
|
174 |
+
},
|
175 |
+
"varInspector": {
|
176 |
+
"cols": {
|
177 |
+
"lenName": 16,
|
178 |
+
"lenType": 16,
|
179 |
+
"lenVar": 40
|
180 |
+
},
|
181 |
+
"kernels_config": {
|
182 |
+
"python": {
|
183 |
+
"delete_cmd_postfix": "",
|
184 |
+
"delete_cmd_prefix": "del ",
|
185 |
+
"library": "var_list.py",
|
186 |
+
"varRefreshCmd": "print(var_dic_list())"
|
187 |
+
},
|
188 |
+
"r": {
|
189 |
+
"delete_cmd_postfix": ") ",
|
190 |
+
"delete_cmd_prefix": "rm(",
|
191 |
+
"library": "var_list.r",
|
192 |
+
"varRefreshCmd": "cat(var_dic_list()) "
|
193 |
+
}
|
194 |
+
},
|
195 |
+
"types_to_exclude": [
|
196 |
+
"module",
|
197 |
+
"function",
|
198 |
+
"builtin_function_or_method",
|
199 |
+
"instance",
|
200 |
+
"_Feature"
|
201 |
+
],
|
202 |
+
"window_display": false
|
203 |
+
}
|
204 |
+
},
|
205 |
+
"nbformat": 4,
|
206 |
+
"nbformat_minor": 5
|
207 |
+
}
|
task_retrosynthesis/calculate_accuracy.py
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import warnings
|
5 |
+
|
6 |
+
import pandas as pd
|
7 |
+
import rdkit
|
8 |
+
from rdkit import Chem
|
9 |
+
from transformers import AutoTokenizer
|
10 |
+
|
11 |
+
rdkit.RDLogger.DisableLog("rdApp.*")
|
12 |
+
|
13 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
14 |
+
from utils import canonicalize, seed_everything
|
15 |
+
|
16 |
+
warnings.filterwarnings("ignore")
|
17 |
+
|
18 |
+
|
19 |
+
def parse_args():
|
20 |
+
parser = argparse.ArgumentParser(
|
21 |
+
description="Script for reaction retrosynthesis prediction."
|
22 |
+
)
|
23 |
+
parser.add_argument(
|
24 |
+
"--input_data",
|
25 |
+
type=str,
|
26 |
+
required=True,
|
27 |
+
help="Path to the input data.",
|
28 |
+
)
|
29 |
+
parser.add_argument(
|
30 |
+
"--target_data",
|
31 |
+
type=str,
|
32 |
+
required=True,
|
33 |
+
help="Path to the target data.",
|
34 |
+
)
|
35 |
+
parser.add_argument(
|
36 |
+
"--target_col",
|
37 |
+
type=str,
|
38 |
+
required=True,
|
39 |
+
help="Name of target column.",
|
40 |
+
)
|
41 |
+
parser.add_argument(
|
42 |
+
"--model_name_or_path",
|
43 |
+
type=str,
|
44 |
+
default="sagawa/ReactionT5v2-retrosynthesis",
|
45 |
+
help="Name or path of the finetuned model for prediction. Can be a local model or one from Hugging Face.",
|
46 |
+
)
|
47 |
+
parser.add_argument(
|
48 |
+
"--num_beams", type=int, default=5, help="Number of beams used for beam search."
|
49 |
+
)
|
50 |
+
parser.add_argument(
|
51 |
+
"--seed", type=int, default=42, help="Seed for reproducibility."
|
52 |
+
)
|
53 |
+
return parser.parse_args()
|
54 |
+
|
55 |
+
|
56 |
+
def remove_space(row):
|
57 |
+
for i in range(5):
|
58 |
+
row[f"{i}th"] = row[f"{i}th"].replace(" ", "")
|
59 |
+
return row
|
60 |
+
|
61 |
+
|
62 |
+
if __name__ == "__main__":
|
63 |
+
CFG = parse_args()
|
64 |
+
|
65 |
+
seed_everything(seed=CFG.seed)
|
66 |
+
|
67 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
68 |
+
os.path.abspath(CFG.model_name_or_path)
|
69 |
+
if os.path.exists(CFG.model_name_or_path)
|
70 |
+
else CFG.model_name_or_path,
|
71 |
+
return_tensors="pt",
|
72 |
+
)
|
73 |
+
|
74 |
+
df = pd.read_csv(CFG.input_data)
|
75 |
+
df[[f"{i}th" for i in range(CFG.num_beams)]] = df[
|
76 |
+
[f"{i}th" for i in range(CFG.num_beams)]
|
77 |
+
].fillna(" ")
|
78 |
+
df["target"] = pd.read_csv(CFG.target_data)[CFG.target_col].values
|
79 |
+
df = df.apply(remove_space, axis=1)
|
80 |
+
|
81 |
+
top_k_invalidity = CFG.num_beams
|
82 |
+
|
83 |
+
top1, top2, top3, top5 = [], [], [], []
|
84 |
+
invalidity = []
|
85 |
+
|
86 |
+
for idx, row in df.iterrows():
|
87 |
+
target = canonicalize(row["target"])
|
88 |
+
if canonicalize(row["0th"]) == target:
|
89 |
+
top1.append(1)
|
90 |
+
top2.append(1)
|
91 |
+
top3.append(1)
|
92 |
+
top5.append(1)
|
93 |
+
elif canonicalize(row["1th"]) == target:
|
94 |
+
top1.append(0)
|
95 |
+
top2.append(1)
|
96 |
+
top3.append(1)
|
97 |
+
top5.append(1)
|
98 |
+
elif canonicalize(row["2th"]) == target:
|
99 |
+
top1.append(0)
|
100 |
+
top2.append(0)
|
101 |
+
top3.append(1)
|
102 |
+
top5.append(1)
|
103 |
+
elif canonicalize(row["3th"]) == target:
|
104 |
+
top1.append(0)
|
105 |
+
top2.append(0)
|
106 |
+
top3.append(0)
|
107 |
+
top5.append(1)
|
108 |
+
elif canonicalize(row["4th"]) == target:
|
109 |
+
top1.append(0)
|
110 |
+
top2.append(0)
|
111 |
+
top3.append(0)
|
112 |
+
top5.append(1)
|
113 |
+
else:
|
114 |
+
top1.append(0)
|
115 |
+
top2.append(0)
|
116 |
+
top3.append(0)
|
117 |
+
top5.append(0)
|
118 |
+
|
119 |
+
input_compound = row["input"]
|
120 |
+
output = [row[f"{i}th"] for i in range(top_k_invalidity)]
|
121 |
+
inval_score = 0
|
122 |
+
for ith, out in enumerate(output):
|
123 |
+
mol = Chem.MolFromSmiles(out.rstrip("."))
|
124 |
+
if not isinstance(mol, Chem.rdchem.Mol):
|
125 |
+
inval_score += 1
|
126 |
+
invalidity.append(inval_score)
|
127 |
+
print(CFG.input_data)
|
128 |
+
print(f"Top 1 accuracy: {sum(top1) / len(top1)}")
|
129 |
+
print(f"Top 2 accuracy: {sum(top2) / len(top2)}")
|
130 |
+
print(f"Top 3 accuracy: {sum(top3) / len(top3)}")
|
131 |
+
print(f"Top 5 accuracy: {sum(top5) / len(top5)}")
|
132 |
+
print(
|
133 |
+
f"Top {top_k_invalidity} Invalidity: {sum(invalidity) / (len(invalidity) * top_k_invalidity) * 100}"
|
134 |
+
)
|
task_retrosynthesis/finetune.py
ADDED
@@ -0,0 +1,278 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import warnings
|
5 |
+
|
6 |
+
import datasets
|
7 |
+
import pandas as pd
|
8 |
+
import torch
|
9 |
+
from datasets import Dataset, DatasetDict
|
10 |
+
from transformers import (
|
11 |
+
AutoModelForSeq2SeqLM,
|
12 |
+
AutoTokenizer,
|
13 |
+
DataCollatorForSeq2Seq,
|
14 |
+
EarlyStoppingCallback,
|
15 |
+
Seq2SeqTrainer,
|
16 |
+
Seq2SeqTrainingArguments,
|
17 |
+
)
|
18 |
+
|
19 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
20 |
+
from train import preprocess_df
|
21 |
+
from utils import filter_out, get_accuracy_score, preprocess_dataset, seed_everything
|
22 |
+
|
23 |
+
# Suppress warnings and disable progress bars
|
24 |
+
warnings.filterwarnings("ignore")
|
25 |
+
datasets.utils.logging.disable_progress_bar()
|
26 |
+
|
27 |
+
|
28 |
+
def parse_args():
|
29 |
+
parser = argparse.ArgumentParser()
|
30 |
+
parser.add_argument(
|
31 |
+
"--train_data_path",
|
32 |
+
type=str,
|
33 |
+
required=True,
|
34 |
+
help="The path to data used for training. CSV file that contains ['REACTANT', 'PRODUCT'] columns is expected.",
|
35 |
+
)
|
36 |
+
parser.add_argument(
|
37 |
+
"--valid_data_path",
|
38 |
+
type=str,
|
39 |
+
required=True,
|
40 |
+
help="The path to data used for validation. CSV file that contains ['REACTANT', 'PRODUCT'] columns is expected.",
|
41 |
+
)
|
42 |
+
parser.add_argument(
|
43 |
+
"--similar_reaction_data_path",
|
44 |
+
type=str,
|
45 |
+
required=False,
|
46 |
+
help="Path to similar data CSV.",
|
47 |
+
)
|
48 |
+
parser.add_argument(
|
49 |
+
"--output_dir", type=str, default="t5", help="Path of the output directory."
|
50 |
+
)
|
51 |
+
parser.add_argument(
|
52 |
+
"--model_name_or_path",
|
53 |
+
type=str,
|
54 |
+
required=False,
|
55 |
+
default="sagawa/ReactionT5v2-retrosynthesis",
|
56 |
+
help="The name of a pretrained model or path to a model which you want to finetune on your dataset. You can use your local models or models uploaded to hugging face.",
|
57 |
+
)
|
58 |
+
parser.add_argument(
|
59 |
+
"--debug",
|
60 |
+
action="store_true",
|
61 |
+
default=False,
|
62 |
+
required=False,
|
63 |
+
help="Use debug mode.",
|
64 |
+
)
|
65 |
+
parser.add_argument(
|
66 |
+
"--epochs",
|
67 |
+
type=int,
|
68 |
+
default=20,
|
69 |
+
required=False,
|
70 |
+
help="Number of epochs for training.",
|
71 |
+
)
|
72 |
+
parser.add_argument("--lr", type=float, default=2e-5, help="Learning rate.")
|
73 |
+
parser.add_argument("--batch_size", type=int, default=32, help="Batch size.")
|
74 |
+
parser.add_argument(
|
75 |
+
"--input_max_length",
|
76 |
+
type=int,
|
77 |
+
default=150,
|
78 |
+
required=False,
|
79 |
+
help="Max input token length.",
|
80 |
+
)
|
81 |
+
parser.add_argument(
|
82 |
+
"--target_max_length",
|
83 |
+
type=int,
|
84 |
+
default=150,
|
85 |
+
required=False,
|
86 |
+
help="Max target token length.",
|
87 |
+
)
|
88 |
+
parser.add_argument(
|
89 |
+
"--eval_beams",
|
90 |
+
type=int,
|
91 |
+
default=5,
|
92 |
+
help="Number of beams used for beam search during evaluation.",
|
93 |
+
)
|
94 |
+
parser.add_argument(
|
95 |
+
"--target_column",
|
96 |
+
type=str,
|
97 |
+
default="REACTANT",
|
98 |
+
help="Target column name.",
|
99 |
+
)
|
100 |
+
parser.add_argument(
|
101 |
+
"--weight_decay",
|
102 |
+
type=float,
|
103 |
+
default=0.01,
|
104 |
+
required=False,
|
105 |
+
help="weight_decay used for trainer",
|
106 |
+
)
|
107 |
+
parser.add_argument(
|
108 |
+
"--evaluation_strategy",
|
109 |
+
type=str,
|
110 |
+
default="epoch",
|
111 |
+
required=False,
|
112 |
+
help="Evaluation strategy used during training. Select from 'no', 'steps', or 'epoch'. If you select 'steps', also give --eval_steps.",
|
113 |
+
)
|
114 |
+
parser.add_argument(
|
115 |
+
"--eval_steps",
|
116 |
+
type=int,
|
117 |
+
required=False,
|
118 |
+
help="Number of update steps between two evaluations",
|
119 |
+
)
|
120 |
+
parser.add_argument(
|
121 |
+
"--save_strategy",
|
122 |
+
type=str,
|
123 |
+
default="epoch",
|
124 |
+
required=False,
|
125 |
+
help="Save strategy used during training. Select from 'no', 'steps', or 'epoch'. If you select 'steps', also give --save_steps.",
|
126 |
+
)
|
127 |
+
parser.add_argument(
|
128 |
+
"--save_steps",
|
129 |
+
type=int,
|
130 |
+
required=False,
|
131 |
+
default=500,
|
132 |
+
help="Number of steps between two saving",
|
133 |
+
)
|
134 |
+
parser.add_argument(
|
135 |
+
"--logging_strategy",
|
136 |
+
type=str,
|
137 |
+
default="epoch",
|
138 |
+
required=False,
|
139 |
+
help="Logging strategy used during training. Select from 'no', 'steps', or 'epoch'. If you select 'steps', also give --logging_steps.",
|
140 |
+
)
|
141 |
+
parser.add_argument(
|
142 |
+
"--logging_steps",
|
143 |
+
type=int,
|
144 |
+
required=False,
|
145 |
+
default=500,
|
146 |
+
help="Number of steps between two logging",
|
147 |
+
)
|
148 |
+
parser.add_argument(
|
149 |
+
"--save_total_limit",
|
150 |
+
type=int,
|
151 |
+
default=3,
|
152 |
+
required=False,
|
153 |
+
help="Limit of the number of saved checkpoints. If limit is reached, the oldest checkpoint will be deleted.",
|
154 |
+
)
|
155 |
+
parser.add_argument(
|
156 |
+
"--fp16",
|
157 |
+
action="store_true",
|
158 |
+
default=False,
|
159 |
+
required=False,
|
160 |
+
help="Use fp16 during training",
|
161 |
+
)
|
162 |
+
parser.add_argument(
|
163 |
+
"--disable_tqdm",
|
164 |
+
action="store_true",
|
165 |
+
default=False,
|
166 |
+
required=False,
|
167 |
+
help="Disable tqdm during training",
|
168 |
+
)
|
169 |
+
parser.add_argument(
|
170 |
+
"--seed",
|
171 |
+
type=int,
|
172 |
+
default=42,
|
173 |
+
required=False,
|
174 |
+
help="Set seed for reproducibility.",
|
175 |
+
)
|
176 |
+
parser.add_argument(
|
177 |
+
"--sampling_num",
|
178 |
+
type=int,
|
179 |
+
default=-1,
|
180 |
+
help="Number of samples used for training. If you want to use all samples, set -1.",
|
181 |
+
)
|
182 |
+
|
183 |
+
return parser.parse_args()
|
184 |
+
|
185 |
+
|
186 |
+
if __name__ == "__main__":
|
187 |
+
CFG = parse_args()
|
188 |
+
CFG.disable_tqdm = True
|
189 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
190 |
+
seed_everything(seed=CFG.seed)
|
191 |
+
|
192 |
+
train = preprocess_df(
|
193 |
+
filter_out(pd.read_csv(CFG.train_data_path), ["REACTANT", "PRODUCT"])
|
194 |
+
)
|
195 |
+
valid = preprocess_df(
|
196 |
+
filter_out(pd.read_csv(CFG.valid_data_path), ["REACTANT", "PRODUCT"])
|
197 |
+
)
|
198 |
+
|
199 |
+
if CFG.sampling_num > 0:
|
200 |
+
train = train.sample(n=CFG.sampling_num, random_state=CFG.seed).reset_index(
|
201 |
+
drop=True
|
202 |
+
)
|
203 |
+
|
204 |
+
if CFG.similar_reaction_data_path:
|
205 |
+
similar = preprocess_df(
|
206 |
+
filter_out(
|
207 |
+
pd.read_csv(CFG.similar_reaction_data_path), ["REACTANT", "PRODUCT"]
|
208 |
+
)
|
209 |
+
)
|
210 |
+
print(len(train))
|
211 |
+
train = pd.concat([train, similar], ignore_index=True)
|
212 |
+
print(len(train))
|
213 |
+
|
214 |
+
dataset = DatasetDict(
|
215 |
+
{
|
216 |
+
"train": Dataset.from_pandas(train[["input", "REACTANT"]]),
|
217 |
+
"validation": Dataset.from_pandas(valid[["input", "REACTANT"]]),
|
218 |
+
}
|
219 |
+
)
|
220 |
+
|
221 |
+
# load tokenizer
|
222 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
223 |
+
os.path.abspath(CFG.model_name_or_path)
|
224 |
+
if os.path.exists(CFG.model_name_or_path)
|
225 |
+
else CFG.model_name_or_path,
|
226 |
+
return_tensors="pt",
|
227 |
+
)
|
228 |
+
CFG.tokenizer = tokenizer
|
229 |
+
|
230 |
+
# load model
|
231 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(
|
232 |
+
os.path.abspath(CFG.model_name_or_path) if os.path.exists(CFG.model_name_or_path) else CFG.model_name_or_path
|
233 |
+
)
|
234 |
+
tokenized_datasets = dataset.map(
|
235 |
+
lambda examples: preprocess_dataset(examples, CFG),
|
236 |
+
batched=True,
|
237 |
+
remove_columns=dataset["train"].column_names,
|
238 |
+
load_from_cache_file=False,
|
239 |
+
)
|
240 |
+
|
241 |
+
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
|
242 |
+
|
243 |
+
args = Seq2SeqTrainingArguments(
|
244 |
+
CFG.output_dir,
|
245 |
+
evaluation_strategy=CFG.evaluation_strategy,
|
246 |
+
eval_steps=CFG.eval_steps,
|
247 |
+
save_strategy=CFG.save_strategy,
|
248 |
+
save_steps=CFG.save_steps,
|
249 |
+
logging_strategy=CFG.logging_strategy,
|
250 |
+
logging_steps=CFG.logging_steps,
|
251 |
+
learning_rate=CFG.lr,
|
252 |
+
per_device_train_batch_size=CFG.batch_size,
|
253 |
+
per_device_eval_batch_size=CFG.batch_size,
|
254 |
+
weight_decay=CFG.weight_decay,
|
255 |
+
save_total_limit=CFG.save_total_limit,
|
256 |
+
num_train_epochs=CFG.epochs,
|
257 |
+
predict_with_generate=True,
|
258 |
+
fp16=CFG.fp16,
|
259 |
+
disable_tqdm=CFG.disable_tqdm,
|
260 |
+
push_to_hub=False,
|
261 |
+
load_best_model_at_end=True,
|
262 |
+
)
|
263 |
+
|
264 |
+
model.config.eval_beams = CFG.eval_beams
|
265 |
+
model.config.max_length = CFG.target_max_length
|
266 |
+
trainer = Seq2SeqTrainer(
|
267 |
+
model,
|
268 |
+
args,
|
269 |
+
train_dataset=tokenized_datasets["train"],
|
270 |
+
eval_dataset=tokenized_datasets["validation"],
|
271 |
+
data_collator=data_collator,
|
272 |
+
tokenizer=tokenizer,
|
273 |
+
compute_metrics=lambda eval_preds: get_accuracy_score(eval_preds, CFG),
|
274 |
+
callbacks=[EarlyStoppingCallback(early_stopping_patience=10)],
|
275 |
+
)
|
276 |
+
|
277 |
+
trainer.train(resume_from_checkpoint=False)
|
278 |
+
trainer.save_model("./best_model")
|
task_retrosynthesis/generate_embedding.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import warnings
|
5 |
+
|
6 |
+
import numpy as np
|
7 |
+
import pandas as pd
|
8 |
+
import torch
|
9 |
+
from torch.utils.data import DataLoader
|
10 |
+
from transformers import AutoTokenizer, T5EncoderModel
|
11 |
+
|
12 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
13 |
+
from generation_utils import ReactionT5Dataset
|
14 |
+
from train import preprocess_df, preprocess_USPTO
|
15 |
+
from utils import filter_out, seed_everything
|
16 |
+
|
17 |
+
warnings.filterwarnings("ignore")
|
18 |
+
|
19 |
+
|
20 |
+
def parse_args():
|
21 |
+
parser = argparse.ArgumentParser()
|
22 |
+
parser.add_argument(
|
23 |
+
"--input_data",
|
24 |
+
type=str,
|
25 |
+
required=True,
|
26 |
+
help="Path to the input data.",
|
27 |
+
)
|
28 |
+
parser.add_argument(
|
29 |
+
"--test_data",
|
30 |
+
type=str,
|
31 |
+
required=False,
|
32 |
+
help="Path to the test data. If provided, the duplicates will be removed from the input data.",
|
33 |
+
)
|
34 |
+
parser.add_argument(
|
35 |
+
"--input_max_length",
|
36 |
+
type=int,
|
37 |
+
default=400,
|
38 |
+
help="Maximum token length of input.",
|
39 |
+
)
|
40 |
+
parser.add_argument(
|
41 |
+
"--model_name_or_path",
|
42 |
+
type=str,
|
43 |
+
default="sagawa/ReactionT5v2-retrosynthesis",
|
44 |
+
help="Name or path of the finetuned model for prediction. Can be a local model or one from Hugging Face.",
|
45 |
+
)
|
46 |
+
parser.add_argument(
|
47 |
+
"--batch_size", type=int, default=5, help="Batch size for prediction."
|
48 |
+
)
|
49 |
+
parser.add_argument(
|
50 |
+
"--output_dir",
|
51 |
+
type=str,
|
52 |
+
default="./",
|
53 |
+
help="Directory where predictions are saved.",
|
54 |
+
)
|
55 |
+
parser.add_argument(
|
56 |
+
"--debug", action="store_true", default=False, help="Use debug mode."
|
57 |
+
)
|
58 |
+
parser.add_argument(
|
59 |
+
"--seed", type=int, default=42, help="Seed for reproducibility."
|
60 |
+
)
|
61 |
+
return parser.parse_args()
|
62 |
+
|
63 |
+
|
64 |
+
def create_embedding(dataloader, model, device):
|
65 |
+
outputs_mean = []
|
66 |
+
model.eval()
|
67 |
+
model.to(device)
|
68 |
+
for inputs in dataloader:
|
69 |
+
inputs = {k: v.to(CFG.device) for k, v in inputs.items()}
|
70 |
+
with torch.no_grad():
|
71 |
+
output = model(**inputs)
|
72 |
+
last_hidden_states = output[0]
|
73 |
+
input_mask_expanded = (
|
74 |
+
inputs["attention_mask"]
|
75 |
+
.unsqueeze(-1)
|
76 |
+
.expand(last_hidden_states.size())
|
77 |
+
.float()
|
78 |
+
)
|
79 |
+
sum_embeddings = torch.sum(last_hidden_states * input_mask_expanded, 1)
|
80 |
+
sum_mask = input_mask_expanded.sum(1)
|
81 |
+
sum_mask = torch.clamp(sum_mask, min=1e-6)
|
82 |
+
mean_embeddings = sum_embeddings / sum_mask
|
83 |
+
outputs_mean.append(mean_embeddings.detach().cpu().numpy())
|
84 |
+
|
85 |
+
return outputs_mean
|
86 |
+
|
87 |
+
|
88 |
+
if __name__ == "__main__":
|
89 |
+
CFG = parse_args()
|
90 |
+
CFG.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
91 |
+
|
92 |
+
if not os.path.exists(CFG.output_dir):
|
93 |
+
os.makedirs(CFG.output_dir)
|
94 |
+
|
95 |
+
seed_everything(seed=CFG.seed)
|
96 |
+
|
97 |
+
CFG.tokenizer = AutoTokenizer.from_pretrained(
|
98 |
+
os.path.abspath(CFG.model_name_or_path)
|
99 |
+
if os.path.exists(CFG.model_name_or_path)
|
100 |
+
else CFG.model_name_or_path,
|
101 |
+
return_tensors="pt",
|
102 |
+
)
|
103 |
+
model = T5EncoderModel.from_pretrained(CFG.model_name_or_path).to(CFG.device)
|
104 |
+
model.eval()
|
105 |
+
|
106 |
+
input_data = filter_out(pd.read_csv(CFG.input_data), ["REACTANT", "PRODUCT"])
|
107 |
+
input_data = preprocess_df(input_data, drop_duplicates=False)
|
108 |
+
|
109 |
+
if CFG.test_data:
|
110 |
+
input_data_copy = preprocess_USPTO(input_data.copy())
|
111 |
+
test_data = filter_out(pd.read_csv(CFG.test_data), ["REACTANT", "PRODUCT"])
|
112 |
+
USPTO_test = preprocess_USPTO(test_data)
|
113 |
+
input_data = input_data[
|
114 |
+
~input_data_copy["pair"].isin(USPTO_test["pair"])
|
115 |
+
].reset_index(drop=True)
|
116 |
+
|
117 |
+
input_data.to_csv(os.path.join(CFG.output_dir, "input_data.csv"), index=False)
|
118 |
+
dataset = ReactionT5Dataset(CFG, input_data)
|
119 |
+
dataloader = DataLoader(
|
120 |
+
dataset,
|
121 |
+
batch_size=CFG.batch_size,
|
122 |
+
shuffle=False,
|
123 |
+
num_workers=4,
|
124 |
+
pin_memory=True,
|
125 |
+
drop_last=False,
|
126 |
+
)
|
127 |
+
|
128 |
+
outputs = create_embedding(dataloader, model, CFG.device)
|
129 |
+
outputs = np.concatenate(outputs, axis=0)
|
130 |
+
|
131 |
+
np.save(os.path.join(CFG.output_dir, "embedding_mean.npy"), outputs)
|
task_retrosynthesis/get_distance.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import warnings
|
5 |
+
|
6 |
+
import numpy as np
|
7 |
+
import pandas as pd
|
8 |
+
import torch
|
9 |
+
|
10 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
11 |
+
from utils import seed_everything
|
12 |
+
|
13 |
+
warnings.filterwarnings("ignore")
|
14 |
+
|
15 |
+
|
16 |
+
def parse_args():
|
17 |
+
parser = argparse.ArgumentParser(description="Search for similar reactions.")
|
18 |
+
parser.add_argument(
|
19 |
+
"--input_data",
|
20 |
+
type=str,
|
21 |
+
required=True,
|
22 |
+
help="Path to the input data.",
|
23 |
+
)
|
24 |
+
parser.add_argument(
|
25 |
+
"--target_embedding",
|
26 |
+
type=str,
|
27 |
+
required=True,
|
28 |
+
help="Path to the target embedding.",
|
29 |
+
)
|
30 |
+
parser.add_argument(
|
31 |
+
"--query_embedding",
|
32 |
+
type=str,
|
33 |
+
required=True,
|
34 |
+
help="Path to the target embedding.",
|
35 |
+
)
|
36 |
+
parser.add_argument("--batch_size", type=int, default=64, help="Batch size.")
|
37 |
+
parser.add_argument(
|
38 |
+
"--output_dir",
|
39 |
+
type=str,
|
40 |
+
default="./",
|
41 |
+
help="Directory where results are saved.",
|
42 |
+
)
|
43 |
+
|
44 |
+
return parser.parse_args()
|
45 |
+
|
46 |
+
|
47 |
+
if __name__ == "__main__":
|
48 |
+
config = parse_args()
|
49 |
+
seed_everything(42)
|
50 |
+
|
51 |
+
target_embedding = np.load(config.target_embedding)
|
52 |
+
query_embedding = np.load(config.query_embedding)
|
53 |
+
|
54 |
+
target_embedding = torch.tensor(target_embedding, dtype=torch.float32).cuda()
|
55 |
+
query_embedding = torch.tensor(query_embedding, dtype=torch.float32).cuda()
|
56 |
+
|
57 |
+
target_embedding = torch.nn.functional.normalize(target_embedding, p=2, dim=1)
|
58 |
+
query_embedding = torch.nn.functional.normalize(query_embedding, p=2, dim=1)
|
59 |
+
|
60 |
+
batch_size = config.batch_size
|
61 |
+
distances = []
|
62 |
+
|
63 |
+
for i in range(0, query_embedding.shape[0], batch_size):
|
64 |
+
print(f"Processing batch {i // batch_size}...")
|
65 |
+
batch = query_embedding[i : i + batch_size]
|
66 |
+
similarity = torch.matmul(batch, target_embedding.T)
|
67 |
+
distance, _ = torch.max(similarity, dim=1)
|
68 |
+
distances.append(distance.cpu().tolist())
|
69 |
+
|
70 |
+
distances = np.concatenate(distances)
|
71 |
+
|
72 |
+
df = pd.read_csv(config.input_data)
|
73 |
+
df["distance"] = distances
|
74 |
+
df.to_csv(os.path.join(config.output_dir, "distance.csv"), index=False)
|
task_retrosynthesis/prediction.py
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import gc
|
3 |
+
import os
|
4 |
+
import sys
|
5 |
+
import warnings
|
6 |
+
|
7 |
+
import pandas as pd
|
8 |
+
import torch
|
9 |
+
from torch.utils.data import DataLoader
|
10 |
+
from tqdm import tqdm
|
11 |
+
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
12 |
+
|
13 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
14 |
+
from generation_utils import (
|
15 |
+
ReactionT5Dataset,
|
16 |
+
decode_output,
|
17 |
+
save_multiple_predictions,
|
18 |
+
)
|
19 |
+
from train import preprocess_df
|
20 |
+
from utils import seed_everything
|
21 |
+
|
22 |
+
warnings.filterwarnings("ignore")
|
23 |
+
|
24 |
+
|
25 |
+
def parse_args():
|
26 |
+
parser = argparse.ArgumentParser(
|
27 |
+
description="Script for reaction retrosynthesis prediction."
|
28 |
+
)
|
29 |
+
parser.add_argument(
|
30 |
+
"--input_data",
|
31 |
+
type=str,
|
32 |
+
required=True,
|
33 |
+
help="Path to the input data.",
|
34 |
+
)
|
35 |
+
parser.add_argument(
|
36 |
+
"--input_max_length",
|
37 |
+
type=int,
|
38 |
+
default=400,
|
39 |
+
help="Maximum token length of input.",
|
40 |
+
)
|
41 |
+
parser.add_argument(
|
42 |
+
"--output_min_length",
|
43 |
+
type=int,
|
44 |
+
default=1,
|
45 |
+
help="Minimum token length of output.",
|
46 |
+
)
|
47 |
+
parser.add_argument(
|
48 |
+
"--output_max_length",
|
49 |
+
type=int,
|
50 |
+
default=300,
|
51 |
+
help="Maximum token length of output.",
|
52 |
+
)
|
53 |
+
parser.add_argument(
|
54 |
+
"--model_name_or_path",
|
55 |
+
type=str,
|
56 |
+
default="sagawa/ReactionT5v2-retrosynthesis",
|
57 |
+
help="Name or path of the finetuned model for prediction. Can be a local model or one from Hugging Face.",
|
58 |
+
)
|
59 |
+
parser.add_argument(
|
60 |
+
"--num_beams", type=int, default=5, help="Number of beams used for beam search."
|
61 |
+
)
|
62 |
+
parser.add_argument(
|
63 |
+
"--num_return_sequences",
|
64 |
+
type=int,
|
65 |
+
default=5,
|
66 |
+
help="Number of predictions returned. Must be less than or equal to num_beams.",
|
67 |
+
)
|
68 |
+
parser.add_argument(
|
69 |
+
"--batch_size", type=int, default=5, help="Batch size for prediction."
|
70 |
+
)
|
71 |
+
parser.add_argument(
|
72 |
+
"--output_dir",
|
73 |
+
type=str,
|
74 |
+
default="./",
|
75 |
+
help="Directory where predictions are saved.",
|
76 |
+
)
|
77 |
+
parser.add_argument(
|
78 |
+
"--debug", action="store_true", default=False, help="Use debug mode."
|
79 |
+
)
|
80 |
+
parser.add_argument(
|
81 |
+
"--seed", type=int, default=42, help="Seed for reproducibility."
|
82 |
+
)
|
83 |
+
return parser.parse_args()
|
84 |
+
|
85 |
+
|
86 |
+
if __name__ == "__main__":
|
87 |
+
CFG = parse_args()
|
88 |
+
CFG.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
89 |
+
|
90 |
+
if not os.path.exists(CFG.output_dir):
|
91 |
+
os.makedirs(CFG.output_dir)
|
92 |
+
|
93 |
+
seed_everything(seed=CFG.seed)
|
94 |
+
|
95 |
+
CFG.tokenizer = AutoTokenizer.from_pretrained(
|
96 |
+
os.path.abspath(CFG.model_name_or_path)
|
97 |
+
if os.path.exists(CFG.model_name_or_path)
|
98 |
+
else CFG.model_name_or_path,
|
99 |
+
return_tensors="pt",
|
100 |
+
)
|
101 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(
|
102 |
+
os.path.abspath(CFG.model_name_or_path)
|
103 |
+
if os.path.exists(CFG.model_name_or_path)
|
104 |
+
else CFG.model_name_or_path
|
105 |
+
).to(CFG.device)
|
106 |
+
model.eval()
|
107 |
+
|
108 |
+
input_data = pd.read_csv(CFG.input_data)
|
109 |
+
input_data = preprocess_df(input_data, drop_duplicates=False)
|
110 |
+
dataset = ReactionT5Dataset(CFG, input_data)
|
111 |
+
dataloader = DataLoader(
|
112 |
+
dataset,
|
113 |
+
batch_size=CFG.batch_size,
|
114 |
+
shuffle=False,
|
115 |
+
num_workers=4,
|
116 |
+
pin_memory=True,
|
117 |
+
drop_last=False,
|
118 |
+
)
|
119 |
+
|
120 |
+
all_sequences, all_scores = [], []
|
121 |
+
for inputs in tqdm(dataloader, total=len(dataloader)):
|
122 |
+
inputs = {k: v.to(CFG.device) for k, v in inputs.items()}
|
123 |
+
with torch.no_grad():
|
124 |
+
output = model.generate(
|
125 |
+
**inputs,
|
126 |
+
min_length=CFG.output_min_length,
|
127 |
+
max_length=CFG.output_max_length,
|
128 |
+
num_beams=CFG.num_beams,
|
129 |
+
num_return_sequences=CFG.num_return_sequences,
|
130 |
+
return_dict_in_generate=True,
|
131 |
+
output_scores=True,
|
132 |
+
)
|
133 |
+
sequences, scores = decode_output(output, CFG)
|
134 |
+
all_sequences.extend(sequences)
|
135 |
+
if scores:
|
136 |
+
all_scores.extend(scores)
|
137 |
+
del output
|
138 |
+
torch.cuda.empty_cache()
|
139 |
+
gc.collect()
|
140 |
+
|
141 |
+
output_df = save_multiple_predictions(input_data, all_sequences, all_scores, CFG)
|
142 |
+
|
143 |
+
output_df.to_csv(os.path.join(CFG.output_dir, "output.csv"), index=False)
|
task_retrosynthesis/train.py
ADDED
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import warnings
|
5 |
+
from pathlib import Path
|
6 |
+
|
7 |
+
import datasets
|
8 |
+
import pandas as pd
|
9 |
+
import torch
|
10 |
+
from datasets import Dataset, DatasetDict
|
11 |
+
from transformers import (
|
12 |
+
AutoModelForSeq2SeqLM,
|
13 |
+
AutoTokenizer,
|
14 |
+
DataCollatorForSeq2Seq,
|
15 |
+
EarlyStoppingCallback,
|
16 |
+
Seq2SeqTrainer,
|
17 |
+
Seq2SeqTrainingArguments,
|
18 |
+
)
|
19 |
+
|
20 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
21 |
+
from utils import (
|
22 |
+
add_new_tokens,
|
23 |
+
filter_out,
|
24 |
+
get_accuracy_score,
|
25 |
+
preprocess_dataset,
|
26 |
+
seed_everything,
|
27 |
+
)
|
28 |
+
|
29 |
+
# Suppress warnings and disable progress bars
|
30 |
+
warnings.filterwarnings("ignore")
|
31 |
+
datasets.utils.logging.disable_progress_bar()
|
32 |
+
|
33 |
+
|
34 |
+
def parse_args():
|
35 |
+
"""Parse command line arguments."""
|
36 |
+
parser = argparse.ArgumentParser(
|
37 |
+
description="Training script for reaction prediction model."
|
38 |
+
)
|
39 |
+
parser.add_argument(
|
40 |
+
"--train_data_path", type=str, required=True, help="Path to training data CSV."
|
41 |
+
)
|
42 |
+
parser.add_argument(
|
43 |
+
"--valid_data_path",
|
44 |
+
type=str,
|
45 |
+
required=True,
|
46 |
+
help="Path to validation data CSV.",
|
47 |
+
)
|
48 |
+
parser.add_argument("--test_data_path", type=str, help="Path to test data CSV.")
|
49 |
+
parser.add_argument(
|
50 |
+
"--USPTO_test_data_path",
|
51 |
+
type=str,
|
52 |
+
help="The path to data used for USPTO testing. CSV file that contains ['REACTANT', 'PRODUCT'] columns is expected.",
|
53 |
+
)
|
54 |
+
parser.add_argument(
|
55 |
+
"--output_dir", type=str, default="t5", help="Path of the output directory."
|
56 |
+
)
|
57 |
+
parser.add_argument(
|
58 |
+
"--pretrained_model_name_or_path",
|
59 |
+
type=str,
|
60 |
+
required=True,
|
61 |
+
help="Pretrained model path or name.",
|
62 |
+
)
|
63 |
+
parser.add_argument(
|
64 |
+
"--debug", action="store_true", default=False, help="Enable debug mode."
|
65 |
+
)
|
66 |
+
parser.add_argument(
|
67 |
+
"--epochs",
|
68 |
+
type=int,
|
69 |
+
default=5,
|
70 |
+
help="Number of epochs.",
|
71 |
+
)
|
72 |
+
parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate.")
|
73 |
+
parser.add_argument("--batch_size", type=int, default=16, help="Batch size.")
|
74 |
+
parser.add_argument(
|
75 |
+
"--input_max_length",
|
76 |
+
type=int,
|
77 |
+
default=400,
|
78 |
+
help="Max input token length.",
|
79 |
+
)
|
80 |
+
parser.add_argument(
|
81 |
+
"--target_max_length",
|
82 |
+
type=int,
|
83 |
+
default=150,
|
84 |
+
help="Max target token length.",
|
85 |
+
)
|
86 |
+
parser.add_argument(
|
87 |
+
"--eval_beams",
|
88 |
+
type=int,
|
89 |
+
default=5,
|
90 |
+
help="Number of beams used for beam search during evaluation.",
|
91 |
+
)
|
92 |
+
parser.add_argument(
|
93 |
+
"--target_column",
|
94 |
+
type=str,
|
95 |
+
default="REACTANT",
|
96 |
+
help="Target column name.",
|
97 |
+
)
|
98 |
+
parser.add_argument(
|
99 |
+
"--weight_decay",
|
100 |
+
type=float,
|
101 |
+
default=0.01,
|
102 |
+
help="Weight decay.",
|
103 |
+
)
|
104 |
+
parser.add_argument(
|
105 |
+
"--evaluation_strategy",
|
106 |
+
type=str,
|
107 |
+
default="epoch",
|
108 |
+
help="Evaluation strategy used during training. Select from 'no', 'steps', or 'epoch'. If you select 'steps', also give --eval_steps.",
|
109 |
+
)
|
110 |
+
parser.add_argument(
|
111 |
+
"--eval_steps",
|
112 |
+
type=int,
|
113 |
+
help="Evaluation steps.",
|
114 |
+
)
|
115 |
+
parser.add_argument(
|
116 |
+
"--save_strategy",
|
117 |
+
type=str,
|
118 |
+
default="epoch",
|
119 |
+
help="Save strategy used during training. Select from 'no', 'steps', or 'epoch'. If you select 'steps', also give --save_steps.",
|
120 |
+
)
|
121 |
+
parser.add_argument(
|
122 |
+
"--save_steps",
|
123 |
+
type=int,
|
124 |
+
default=500,
|
125 |
+
help="Save steps.",
|
126 |
+
)
|
127 |
+
parser.add_argument(
|
128 |
+
"--logging_strategy",
|
129 |
+
type=str,
|
130 |
+
default="epoch",
|
131 |
+
help="Logging strategy used during training. Select from 'no', 'steps', or 'epoch'. If you select 'steps', also give --logging_steps.",
|
132 |
+
)
|
133 |
+
parser.add_argument(
|
134 |
+
"--logging_steps",
|
135 |
+
type=int,
|
136 |
+
default=500,
|
137 |
+
help="Logging steps.",
|
138 |
+
)
|
139 |
+
parser.add_argument(
|
140 |
+
"--save_total_limit",
|
141 |
+
type=int,
|
142 |
+
default=2,
|
143 |
+
help="Limit of saved checkpoints.",
|
144 |
+
)
|
145 |
+
parser.add_argument(
|
146 |
+
"--fp16",
|
147 |
+
action="store_true",
|
148 |
+
default=False,
|
149 |
+
help="Enable fp16 training.",
|
150 |
+
)
|
151 |
+
parser.add_argument(
|
152 |
+
"--disable_tqdm",
|
153 |
+
action="store_true",
|
154 |
+
default=False,
|
155 |
+
help="Disable tqdm.",
|
156 |
+
)
|
157 |
+
parser.add_argument(
|
158 |
+
"--seed",
|
159 |
+
type=int,
|
160 |
+
default=42,
|
161 |
+
help="Random seed.",
|
162 |
+
)
|
163 |
+
|
164 |
+
return parser.parse_args()
|
165 |
+
|
166 |
+
|
167 |
+
def preprocess_df(df, drop_duplicates=True):
|
168 |
+
"""Preprocess the dataframe by filling NaNs, dropping duplicates, and formatting the input."""
|
169 |
+
for col in ["REACTANT", "PRODUCT", "CATALYST", "REAGENT", "SOLVENT"]:
|
170 |
+
if col not in df.columns:
|
171 |
+
df[col] = None
|
172 |
+
df[col] = df[col].fillna(" ")
|
173 |
+
|
174 |
+
if drop_duplicates:
|
175 |
+
df = (
|
176 |
+
df[["REACTANT", "PRODUCT", "CATALYST", "REAGENT", "SOLVENT"]]
|
177 |
+
.drop_duplicates()
|
178 |
+
.reset_index(drop=True)
|
179 |
+
)
|
180 |
+
df["input"] = df["PRODUCT"]
|
181 |
+
|
182 |
+
return df
|
183 |
+
|
184 |
+
|
185 |
+
def preprocess_USPTO(df):
|
186 |
+
df["REACTANT"] = df["REACTANT"].apply(lambda x: str(sorted(x.split("."))))
|
187 |
+
df["PRODUCT"] = df["PRODUCT"].apply(lambda x: str(sorted(x.split("."))))
|
188 |
+
|
189 |
+
df["pair"] = df["REACTANT"] + " - " + df["PRODUCT"].astype(str)
|
190 |
+
|
191 |
+
return df
|
192 |
+
|
193 |
+
|
194 |
+
if __name__ == "__main__":
|
195 |
+
CFG = parse_args()
|
196 |
+
CFG.disable_tqdm = True
|
197 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
198 |
+
seed_everything(seed=CFG.seed)
|
199 |
+
|
200 |
+
train = preprocess_df(
|
201 |
+
filter_out(pd.read_csv(CFG.train_data_path), ["REACTANT", "PRODUCT"])
|
202 |
+
)
|
203 |
+
valid = preprocess_df(
|
204 |
+
filter_out(pd.read_csv(CFG.valid_data_path), ["REACTANT", "PRODUCT"])
|
205 |
+
)
|
206 |
+
if CFG.USPTO_test_data_path:
|
207 |
+
train_copy = preprocess_USPTO(train.copy())
|
208 |
+
USPTO_test = preprocess_USPTO(pd.read_csv(CFG.USPTO_test_data_path))
|
209 |
+
train = train[~train_copy["pair"].isin(USPTO_test["pair"])].reset_index(
|
210 |
+
drop=True
|
211 |
+
)
|
212 |
+
train["pair"] = train["REACTANT"] + " - " + train["PRODUCT"]
|
213 |
+
valid["pair"] = valid["REACTANT"] + " - " + valid["PRODUCT"]
|
214 |
+
valid = valid[~valid["pair"].isin(train["pair"])].reset_index(drop=True)
|
215 |
+
train.to_csv("train.csv", index=False)
|
216 |
+
valid.to_csv("valid.csv", index=False)
|
217 |
+
|
218 |
+
if CFG.test_data_path:
|
219 |
+
test = preprocess_df(
|
220 |
+
filter_out(pd.read_csv(CFG.test_data_path), ["REACTANT", "PRODUCT"])
|
221 |
+
)
|
222 |
+
test["pair"] = test["REACTANT"] + " - " + test["PRODUCT"]
|
223 |
+
test = test[~test["pair"].isin(train["pair"])].reset_index(drop=True)
|
224 |
+
test = test.drop_duplicates(subset=["pair"]).reset_index(drop=True)
|
225 |
+
test.to_csv("test.csv", index=False)
|
226 |
+
|
227 |
+
dataset = DatasetDict(
|
228 |
+
{
|
229 |
+
"train": Dataset.from_pandas(train[["input", "REACTANT"]]),
|
230 |
+
"validation": Dataset.from_pandas(valid[["input", "REACTANT"]]),
|
231 |
+
}
|
232 |
+
)
|
233 |
+
|
234 |
+
# load tokenizer
|
235 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
236 |
+
os.path.abspath(CFG.pretrained_model_name_or_path)
|
237 |
+
if os.path.exists(CFG.pretrained_model_name_or_path)
|
238 |
+
else CFG.pretrained_model_name_or_path,
|
239 |
+
return_tensors="pt",
|
240 |
+
)
|
241 |
+
tokenizer = add_new_tokens(
|
242 |
+
tokenizer,
|
243 |
+
Path(__file__).resolve().parent.parent / "data" / "additional_tokens.txt",
|
244 |
+
)
|
245 |
+
tokenizer.add_special_tokens(
|
246 |
+
{
|
247 |
+
"additional_special_tokens": tokenizer.additional_special_tokens
|
248 |
+
+ ["REACTANT:", "REAGENT:"]
|
249 |
+
}
|
250 |
+
)
|
251 |
+
CFG.tokenizer = tokenizer
|
252 |
+
|
253 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(
|
254 |
+
os.path.abspath(CFG.pretrained_model_name_or_path) if os.path.exists(CFG.pretrained_model_name_or_path) else CFG.pretrained_model_name_or_path
|
255 |
+
)
|
256 |
+
model.resize_token_embeddings(len(tokenizer))
|
257 |
+
|
258 |
+
tokenized_datasets = dataset.map(
|
259 |
+
lambda examples: preprocess_dataset(examples, CFG),
|
260 |
+
batched=True,
|
261 |
+
remove_columns=dataset["train"].column_names,
|
262 |
+
load_from_cache_file=False,
|
263 |
+
)
|
264 |
+
|
265 |
+
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
|
266 |
+
|
267 |
+
args = Seq2SeqTrainingArguments(
|
268 |
+
CFG.output_dir,
|
269 |
+
evaluation_strategy=CFG.evaluation_strategy,
|
270 |
+
eval_steps=CFG.eval_steps,
|
271 |
+
save_strategy=CFG.save_strategy,
|
272 |
+
save_steps=CFG.save_steps,
|
273 |
+
logging_strategy=CFG.logging_strategy,
|
274 |
+
logging_steps=CFG.logging_steps,
|
275 |
+
learning_rate=CFG.lr,
|
276 |
+
per_device_train_batch_size=CFG.batch_size,
|
277 |
+
per_device_eval_batch_size=CFG.batch_size,
|
278 |
+
weight_decay=CFG.weight_decay,
|
279 |
+
save_total_limit=CFG.save_total_limit,
|
280 |
+
num_train_epochs=CFG.epochs,
|
281 |
+
predict_with_generate=True,
|
282 |
+
fp16=CFG.fp16,
|
283 |
+
disable_tqdm=CFG.disable_tqdm,
|
284 |
+
push_to_hub=False,
|
285 |
+
load_best_model_at_end=True,
|
286 |
+
)
|
287 |
+
|
288 |
+
model.config.eval_beams = CFG.eval_beams
|
289 |
+
model.config.max_length = CFG.target_max_length
|
290 |
+
trainer = Seq2SeqTrainer(
|
291 |
+
model,
|
292 |
+
args,
|
293 |
+
train_dataset=tokenized_datasets["train"],
|
294 |
+
eval_dataset=tokenized_datasets["validation"],
|
295 |
+
data_collator=data_collator,
|
296 |
+
tokenizer=tokenizer,
|
297 |
+
compute_metrics=lambda eval_preds: get_accuracy_score(eval_preds, CFG),
|
298 |
+
callbacks=[EarlyStoppingCallback(early_stopping_patience=10)],
|
299 |
+
)
|
300 |
+
|
301 |
+
try:
|
302 |
+
trainer.train(resume_from_checkpoint=True)
|
303 |
+
except:
|
304 |
+
trainer.train(resume_from_checkpoint=None)
|
305 |
+
trainer.save_model("./best_model")
|
task_retrosynthesis/visualize_embedding.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
task_yield/calculate_score.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
task_yield/convert_to_PreTrainedModel.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import glob
|
3 |
+
import os
|
4 |
+
import sys
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from transformers import AutoConfig, AutoTokenizer
|
8 |
+
|
9 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
10 |
+
from models import ReactionT5Yield
|
11 |
+
|
12 |
+
|
13 |
+
def parse_args():
|
14 |
+
"""
|
15 |
+
Parse command line arguments.
|
16 |
+
"""
|
17 |
+
parser = argparse.ArgumentParser(
|
18 |
+
description="ReactionT5Yield model impremented with nn.Module with transformers' PreTrainedModel"
|
19 |
+
)
|
20 |
+
parser.add_argument(
|
21 |
+
"--model_name_or_path",
|
22 |
+
type=str,
|
23 |
+
help="The name of a finetuned model or path to a model which you want to convert. You can use your local models or models uploaded to hugging face.",
|
24 |
+
)
|
25 |
+
parser.add_argument(
|
26 |
+
"--base_model_name_or_path",
|
27 |
+
type=str,
|
28 |
+
help="The name of the base model of the finetuned model",
|
29 |
+
)
|
30 |
+
parser.add_argument(
|
31 |
+
"--output_dir",
|
32 |
+
type=str,
|
33 |
+
default="./",
|
34 |
+
help="Directory to save the prediction.",
|
35 |
+
)
|
36 |
+
parser.add_argument(
|
37 |
+
"--fc_dropout",
|
38 |
+
type=float,
|
39 |
+
default=0.0,
|
40 |
+
)
|
41 |
+
|
42 |
+
return parser.parse_args()
|
43 |
+
|
44 |
+
|
45 |
+
if __name__ == "__main__":
|
46 |
+
CFG = parse_args()
|
47 |
+
|
48 |
+
if not os.path.exists(CFG.output_dir):
|
49 |
+
os.makedirs(CFG.output_dir)
|
50 |
+
|
51 |
+
CFG.tokenizer = AutoTokenizer.from_pretrained(
|
52 |
+
CFG.model_name_or_path, return_tensors="pt"
|
53 |
+
)
|
54 |
+
|
55 |
+
model = ReactionT5Yield(
|
56 |
+
CFG,
|
57 |
+
config_path=os.path.join(CFG.model_name_or_path, "config.pth"),
|
58 |
+
pretrained=False,
|
59 |
+
)
|
60 |
+
pth_files = glob.glob(os.path.join(CFG.model_name_or_path, "*.pth"))
|
61 |
+
for pth_file in pth_files:
|
62 |
+
state = torch.load(
|
63 |
+
pth_file,
|
64 |
+
map_location=torch.device("cpu"),
|
65 |
+
)
|
66 |
+
try:
|
67 |
+
model.load_state_dict(state)
|
68 |
+
break
|
69 |
+
except:
|
70 |
+
pass
|
71 |
+
|
72 |
+
config = AutoConfig.from_pretrained(CFG.base_model_name_or_path)
|
73 |
+
config.vocab_size = len(CFG.tokenizer)
|
74 |
+
|
75 |
+
CFG.tokenizer.save_pretrained(CFG.output_dir)
|
76 |
+
torch.save(model.state_dict(), os.path.join(CFG.output_dir, "pytorch_model.bin"))
|
77 |
+
config.save_pretrained(CFG.output_dir)
|
task_yield/finetune.py
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import subprocess
|
4 |
+
import sys
|
5 |
+
import warnings
|
6 |
+
|
7 |
+
import pandas as pd
|
8 |
+
import torch
|
9 |
+
from datasets.utils.logging import disable_progress_bar
|
10 |
+
from transformers import AutoTokenizer
|
11 |
+
|
12 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
13 |
+
from train import preprocess_df, train_loop
|
14 |
+
from utils import get_logger, seed_everything
|
15 |
+
|
16 |
+
# Suppress warnings and logging
|
17 |
+
warnings.filterwarnings("ignore")
|
18 |
+
disable_progress_bar()
|
19 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
20 |
+
|
21 |
+
|
22 |
+
def parse_args():
|
23 |
+
"""
|
24 |
+
Parse command line arguments.
|
25 |
+
"""
|
26 |
+
parser = argparse.ArgumentParser(
|
27 |
+
description="Training script for ReactionT5Yield model."
|
28 |
+
)
|
29 |
+
|
30 |
+
parser.add_argument(
|
31 |
+
"--train_data_path",
|
32 |
+
type=str,
|
33 |
+
required=True,
|
34 |
+
help="Path to training data CSV file.",
|
35 |
+
)
|
36 |
+
parser.add_argument(
|
37 |
+
"--valid_data_path",
|
38 |
+
type=str,
|
39 |
+
required=True,
|
40 |
+
help="Path to validation data CSV file.",
|
41 |
+
)
|
42 |
+
parser.add_argument(
|
43 |
+
"--similar_reaction_data_path",
|
44 |
+
type=str,
|
45 |
+
required=False,
|
46 |
+
help="Path to similar data CSV.",
|
47 |
+
)
|
48 |
+
parser.add_argument(
|
49 |
+
"--pretrained_model_name_or_path",
|
50 |
+
type=str,
|
51 |
+
default="sagawa/CompoundT5",
|
52 |
+
help="Pretrained model name or path.",
|
53 |
+
)
|
54 |
+
parser.add_argument(
|
55 |
+
"--model_name_or_path",
|
56 |
+
type=str,
|
57 |
+
help="The model's name or path used for fine-tuning.",
|
58 |
+
)
|
59 |
+
parser.add_argument(
|
60 |
+
"--download_pretrained_model",
|
61 |
+
action="store_true",
|
62 |
+
default=False,
|
63 |
+
required=False,
|
64 |
+
help="Download pretrained model from hugging face hub and use it for fine-tuning.",
|
65 |
+
)
|
66 |
+
parser.add_argument("--debug", action="store_true", help="Enable debug mode.")
|
67 |
+
parser.add_argument(
|
68 |
+
"--epochs", type=int, default=200, help="Number of training epochs."
|
69 |
+
)
|
70 |
+
parser.add_argument(
|
71 |
+
"--patience", type=int, default=10, help="Early stopping patience."
|
72 |
+
)
|
73 |
+
parser.add_argument("--lr", type=float, default=1e-5, help="Learning rate.")
|
74 |
+
parser.add_argument("--batch_size", type=int, default=32, help="Batch size.")
|
75 |
+
parser.add_argument(
|
76 |
+
"--input_max_length", type=int, default=300, help="Maximum input token length."
|
77 |
+
)
|
78 |
+
parser.add_argument(
|
79 |
+
"--num_workers", type=int, default=4, help="Number of data loading workers."
|
80 |
+
)
|
81 |
+
parser.add_argument(
|
82 |
+
"--fc_dropout",
|
83 |
+
type=float,
|
84 |
+
default=0.0,
|
85 |
+
help="Dropout rate after fully connected layers.",
|
86 |
+
)
|
87 |
+
parser.add_argument(
|
88 |
+
"--eps", type=float, default=1e-6, help="Epsilon for Adam optimizer."
|
89 |
+
)
|
90 |
+
parser.add_argument(
|
91 |
+
"--weight_decay", type=float, default=0.05, help="Weight decay for optimizer."
|
92 |
+
)
|
93 |
+
parser.add_argument(
|
94 |
+
"--max_grad_norm",
|
95 |
+
type=int,
|
96 |
+
default=1000,
|
97 |
+
help="Maximum gradient norm for clipping.",
|
98 |
+
)
|
99 |
+
parser.add_argument(
|
100 |
+
"--gradient_accumulation_steps",
|
101 |
+
type=int,
|
102 |
+
default=1,
|
103 |
+
help="Gradient accumulation steps.",
|
104 |
+
)
|
105 |
+
parser.add_argument(
|
106 |
+
"--num_warmup_steps", type=int, default=0, help="Number of warmup steps."
|
107 |
+
)
|
108 |
+
parser.add_argument(
|
109 |
+
"--batch_scheduler", action="store_true", help="Use batch scheduler."
|
110 |
+
)
|
111 |
+
parser.add_argument(
|
112 |
+
"--print_freq", type=int, default=100, help="Logging frequency."
|
113 |
+
)
|
114 |
+
parser.add_argument(
|
115 |
+
"--use_amp",
|
116 |
+
action="store_true",
|
117 |
+
help="Use automatic mixed precision for training.",
|
118 |
+
)
|
119 |
+
parser.add_argument(
|
120 |
+
"--output_dir",
|
121 |
+
type=str,
|
122 |
+
default="./",
|
123 |
+
help="Directory to save the trained model.",
|
124 |
+
)
|
125 |
+
parser.add_argument(
|
126 |
+
"--seed", type=int, default=42, help="Random seed for reproducibility."
|
127 |
+
)
|
128 |
+
parser.add_argument(
|
129 |
+
"--sampling_num",
|
130 |
+
type=int,
|
131 |
+
default=-1,
|
132 |
+
help="Number of samples used for training. If you want to use all samples, set -1.",
|
133 |
+
)
|
134 |
+
parser.add_argument(
|
135 |
+
"--sampling_frac",
|
136 |
+
type=float,
|
137 |
+
default=-1.0,
|
138 |
+
help="Ratio of samples used for training. If you want to use all samples, set -1.0.",
|
139 |
+
)
|
140 |
+
parser.add_argument(
|
141 |
+
"--checkpoint",
|
142 |
+
type=str,
|
143 |
+
help="Path to the checkpoint file for resuming training.",
|
144 |
+
)
|
145 |
+
|
146 |
+
return parser.parse_args()
|
147 |
+
|
148 |
+
|
149 |
+
def download_pretrained_model():
|
150 |
+
"""
|
151 |
+
Download the pretrained model from Hugging Face.
|
152 |
+
"""
|
153 |
+
subprocess.run(
|
154 |
+
"wget https://huggingface.co/sagawa/ReactionT5v2-yield/resolve/main/CompoundT5_best.pth",
|
155 |
+
shell=True,
|
156 |
+
)
|
157 |
+
subprocess.run(
|
158 |
+
"wget https://huggingface.co/sagawa/ReactionT5v2-yield/resolve/main/config.pth",
|
159 |
+
shell=True,
|
160 |
+
)
|
161 |
+
subprocess.run(
|
162 |
+
"wget https://huggingface.co/sagawa/ReactionT5v2-yield/resolve/main/special_tokens_map.json",
|
163 |
+
shell=True,
|
164 |
+
)
|
165 |
+
subprocess.run(
|
166 |
+
"wget https://huggingface.co/sagawa/ReactionT5v2-yield/resolve/main/tokenizer.json",
|
167 |
+
shell=True,
|
168 |
+
)
|
169 |
+
subprocess.run(
|
170 |
+
"wget https://huggingface.co/sagawa/ReactionT5v2-yield/resolve/main/tokenizer_config.json",
|
171 |
+
shell=True,
|
172 |
+
)
|
173 |
+
|
174 |
+
|
175 |
+
if __name__ == "__main__":
|
176 |
+
CFG = parse_args()
|
177 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
178 |
+
CFG.device = device
|
179 |
+
if not os.path.exists(CFG.output_dir):
|
180 |
+
os.makedirs(CFG.output_dir)
|
181 |
+
seed_everything(seed=CFG.seed)
|
182 |
+
|
183 |
+
if CFG.download_pretrained_model:
|
184 |
+
download_pretrained_model()
|
185 |
+
CFG.model_name_or_path = "."
|
186 |
+
|
187 |
+
train = pd.read_csv(CFG.train_data_path).drop_duplicates().reset_index(drop=True)
|
188 |
+
valid = pd.read_csv(CFG.valid_data_path).drop_duplicates().reset_index(drop=True)
|
189 |
+
train = preprocess_df(train, CFG)
|
190 |
+
valid = preprocess_df(valid, CFG)
|
191 |
+
|
192 |
+
if CFG.sampling_num > 0:
|
193 |
+
train = train.sample(n=CFG.sampling_num, random_state=CFG.seed).reset_index(
|
194 |
+
drop=True
|
195 |
+
)
|
196 |
+
elif CFG.sampling_frac > 0 and CFG.sampling_frac < 1:
|
197 |
+
train = train.sample(frac=CFG.sampling_frac, random_state=CFG.seed).reset_index(
|
198 |
+
drop=True
|
199 |
+
)
|
200 |
+
|
201 |
+
if CFG.similar_reaction_data_path:
|
202 |
+
similar = preprocess_df(pd.read_csv(CFG.similar_reaction_data_path), CFG)
|
203 |
+
print(len(train))
|
204 |
+
train = pd.concat([train, similar], ignore_index=True)
|
205 |
+
print(len(train))
|
206 |
+
|
207 |
+
LOGGER = get_logger(os.path.join(CFG.output_dir, "train"))
|
208 |
+
CFG.logger = LOGGER
|
209 |
+
|
210 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
211 |
+
os.path.abspath(CFG.model_name_or_path)
|
212 |
+
if os.path.exists(CFG.model_name_or_path)
|
213 |
+
else CFG.model_name_or_path,
|
214 |
+
return_tensors="pt",
|
215 |
+
)
|
216 |
+
tokenizer.save_pretrained(CFG.output_dir)
|
217 |
+
CFG.tokenizer = tokenizer
|
218 |
+
|
219 |
+
train_loop(train, valid, CFG)
|
task_yield/generate_embedding.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
|
5 |
+
import numpy as np
|
6 |
+
import pandas as pd
|
7 |
+
import torch
|
8 |
+
from torch.utils.data import DataLoader
|
9 |
+
from transformers import AutoTokenizer
|
10 |
+
|
11 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
12 |
+
from generation_utils import ReactionT5Dataset
|
13 |
+
from models import ReactionT5Yield2
|
14 |
+
from train import preprocess_df
|
15 |
+
from utils import filter_out, seed_everything
|
16 |
+
|
17 |
+
|
18 |
+
def parse_args():
|
19 |
+
"""
|
20 |
+
Parse command line arguments.
|
21 |
+
"""
|
22 |
+
parser = argparse.ArgumentParser(
|
23 |
+
description="Prediction script for ReactionT5Yield model."
|
24 |
+
)
|
25 |
+
|
26 |
+
parser.add_argument(
|
27 |
+
"--input_data",
|
28 |
+
type=str,
|
29 |
+
required=True,
|
30 |
+
help="Data as a string or CSV file that contains an 'input' column. The format of the string or contents of the column are like 'REACTANT:{reactants of the reaction}PRODUCT:{products of the reaction}'. If there are multiple reactants, concatenate them with '.'.",
|
31 |
+
)
|
32 |
+
parser.add_argument(
|
33 |
+
"--test_data",
|
34 |
+
type=str,
|
35 |
+
required=False,
|
36 |
+
help="Path to the test data. If provided, the duplicates will be removed from the input data.",
|
37 |
+
)
|
38 |
+
parser.add_argument(
|
39 |
+
"--model_name_or_path",
|
40 |
+
type=str,
|
41 |
+
default="sagawa/ReactionT5v2-yield",
|
42 |
+
help="Name or path of the finetuned model for prediction. Can be a local model or one from Hugging Face.",
|
43 |
+
)
|
44 |
+
parser.add_argument("--debug", action="store_true", help="Use debug mode.")
|
45 |
+
parser.add_argument(
|
46 |
+
"--input_max_length",
|
47 |
+
type=int,
|
48 |
+
default=400,
|
49 |
+
help="Maximum token length of input.",
|
50 |
+
)
|
51 |
+
parser.add_argument(
|
52 |
+
"--batch_size", type=int, default=5, required=False, help="Batch size."
|
53 |
+
)
|
54 |
+
parser.add_argument(
|
55 |
+
"--num_workers", type=int, default=4, help="Number of data loading workers."
|
56 |
+
)
|
57 |
+
parser.add_argument(
|
58 |
+
"--fc_dropout",
|
59 |
+
type=float,
|
60 |
+
default=0.0,
|
61 |
+
help="Dropout rate after fully connected layers.",
|
62 |
+
)
|
63 |
+
parser.add_argument(
|
64 |
+
"--output_dir",
|
65 |
+
type=str,
|
66 |
+
default="./",
|
67 |
+
help="Directory where predictions are saved.",
|
68 |
+
)
|
69 |
+
parser.add_argument(
|
70 |
+
"--seed", type=int, default=42, help="Random seed for reproducibility."
|
71 |
+
)
|
72 |
+
|
73 |
+
return parser.parse_args()
|
74 |
+
|
75 |
+
|
76 |
+
def create_embedding(dataloader, model, device):
|
77 |
+
outputs = []
|
78 |
+
model.eval()
|
79 |
+
model.to(device)
|
80 |
+
for inputs in dataloader:
|
81 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
82 |
+
with torch.no_grad():
|
83 |
+
output = model.generate_embedding(inputs)
|
84 |
+
|
85 |
+
outputs.append(output.detach().cpu().numpy())
|
86 |
+
|
87 |
+
return outputs
|
88 |
+
|
89 |
+
|
90 |
+
if __name__ == "__main__":
|
91 |
+
CFG = parse_args()
|
92 |
+
|
93 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
94 |
+
CFG.device = device
|
95 |
+
|
96 |
+
if not os.path.exists(CFG.output_dir):
|
97 |
+
os.makedirs(CFG.output_dir)
|
98 |
+
|
99 |
+
seed_everything(seed=CFG.seed)
|
100 |
+
|
101 |
+
CFG.tokenizer = AutoTokenizer.from_pretrained(
|
102 |
+
os.path.abspath(CFG.model_name_or_path)
|
103 |
+
if os.path.exists(CFG.model_name_or_path)
|
104 |
+
else CFG.model_name_or_path,
|
105 |
+
return_tensors="pt",
|
106 |
+
)
|
107 |
+
|
108 |
+
model = ReactionT5Yield2.from_pretrained(CFG.model_name_or_path).to(CFG.device)
|
109 |
+
model.eval()
|
110 |
+
|
111 |
+
input_data = filter_out(
|
112 |
+
pd.read_csv(CFG.input_data), ["YIELD", "REACTANT", "PRODUCT"]
|
113 |
+
)
|
114 |
+
input_data = preprocess_df(input_data, CFG, drop_duplicates=False)
|
115 |
+
if CFG.test_data:
|
116 |
+
test_data = filter_out(
|
117 |
+
pd.read_csv(CFG.test_data), ["YIELD", "REACTANT", "PRODUCT"]
|
118 |
+
)
|
119 |
+
test_data = preprocess_df(test_data, CFG, drop_duplicates=False)
|
120 |
+
# Remove duplicates from the input data
|
121 |
+
input_data = input_data[
|
122 |
+
~input_data["input"].isin(test_data["input"])
|
123 |
+
].reset_index(drop=True)
|
124 |
+
input_data.to_csv(os.path.join(CFG.output_dir, "input_data.csv"), index=False)
|
125 |
+
dataset = ReactionT5Dataset(CFG, input_data)
|
126 |
+
dataloader = DataLoader(
|
127 |
+
dataset,
|
128 |
+
batch_size=CFG.batch_size,
|
129 |
+
shuffle=False,
|
130 |
+
num_workers=CFG.num_workers,
|
131 |
+
pin_memory=True,
|
132 |
+
drop_last=False,
|
133 |
+
)
|
134 |
+
|
135 |
+
outputs = create_embedding(dataloader, model, CFG.device)
|
136 |
+
outputs = np.concatenate(outputs, axis=0)
|
137 |
+
|
138 |
+
np.save(os.path.join(CFG.output_dir, "embedding_mean.npy"), outputs)
|
task_yield/get_distance.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import warnings
|
5 |
+
|
6 |
+
import numpy as np
|
7 |
+
import pandas as pd
|
8 |
+
import torch
|
9 |
+
|
10 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
11 |
+
from utils import seed_everything
|
12 |
+
|
13 |
+
warnings.filterwarnings("ignore")
|
14 |
+
|
15 |
+
|
16 |
+
def parse_args():
|
17 |
+
parser = argparse.ArgumentParser(description="Search for similar reactions.")
|
18 |
+
parser.add_argument(
|
19 |
+
"--input_data",
|
20 |
+
type=str,
|
21 |
+
required=True,
|
22 |
+
help="Path to the input data.",
|
23 |
+
)
|
24 |
+
parser.add_argument(
|
25 |
+
"--target_embedding",
|
26 |
+
type=str,
|
27 |
+
required=True,
|
28 |
+
help="Path to the target embedding.",
|
29 |
+
)
|
30 |
+
parser.add_argument(
|
31 |
+
"--query_embedding",
|
32 |
+
type=str,
|
33 |
+
required=True,
|
34 |
+
help="Path to the target embedding.",
|
35 |
+
)
|
36 |
+
parser.add_argument(
|
37 |
+
"--top_k",
|
38 |
+
type=int,
|
39 |
+
default=1,
|
40 |
+
help="Number of similar reactions to retrieve.",
|
41 |
+
)
|
42 |
+
parser.add_argument("--batch_size", type=int, default=64, help="Batch size.")
|
43 |
+
parser.add_argument(
|
44 |
+
"--output_dir",
|
45 |
+
type=str,
|
46 |
+
default="./",
|
47 |
+
help="Directory where results are saved.",
|
48 |
+
)
|
49 |
+
|
50 |
+
return parser.parse_args()
|
51 |
+
|
52 |
+
|
53 |
+
if __name__ == "__main__":
|
54 |
+
config = parse_args()
|
55 |
+
seed_everything(42)
|
56 |
+
|
57 |
+
target_embedding = np.load(config.target_embedding)
|
58 |
+
query_embedding = np.load(config.query_embedding)
|
59 |
+
|
60 |
+
target_embedding = torch.tensor(target_embedding, dtype=torch.float32).cuda()
|
61 |
+
query_embedding = torch.tensor(query_embedding, dtype=torch.float32).cuda()
|
62 |
+
|
63 |
+
target_embedding = torch.nn.functional.normalize(target_embedding, p=2, dim=1)
|
64 |
+
query_embedding = torch.nn.functional.normalize(query_embedding, p=2, dim=1)
|
65 |
+
|
66 |
+
batch_size = config.batch_size
|
67 |
+
distances = []
|
68 |
+
|
69 |
+
for i in range(0, query_embedding.shape[0], batch_size):
|
70 |
+
print(f"Processing batch {i // batch_size}...")
|
71 |
+
batch = query_embedding[i : i + batch_size]
|
72 |
+
similarity = torch.matmul(batch, target_embedding.T)
|
73 |
+
distance, _ = torch.max(similarity, dim=1)
|
74 |
+
distances.append(distance.cpu().tolist())
|
75 |
+
|
76 |
+
distances = np.concatenate(distances)
|
77 |
+
|
78 |
+
df = pd.read_csv(config.input_data)
|
79 |
+
df["distance"] = distances
|
80 |
+
df.to_csv(os.path.join(config.output_dir, "distance.csv"), index=False)
|
task_yield/prediction.py
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import glob
|
3 |
+
import logging
|
4 |
+
import os
|
5 |
+
import sys
|
6 |
+
import warnings
|
7 |
+
|
8 |
+
import numpy as np
|
9 |
+
import pandas as pd
|
10 |
+
import torch
|
11 |
+
from datasets.utils.logging import disable_progress_bar
|
12 |
+
from torch.utils.data import DataLoader
|
13 |
+
from tqdm import tqdm
|
14 |
+
from transformers import AutoTokenizer
|
15 |
+
|
16 |
+
# Suppress warnings and logging
|
17 |
+
warnings.filterwarnings("ignore")
|
18 |
+
logging.disable(logging.WARNING)
|
19 |
+
disable_progress_bar()
|
20 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
21 |
+
|
22 |
+
# Append the utils module path
|
23 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
24 |
+
from finetune import download_pretrained_model
|
25 |
+
from generation_utils import ReactionT5Dataset
|
26 |
+
from models import ReactionT5Yield
|
27 |
+
from train import preprocess_df
|
28 |
+
from utils import seed_everything
|
29 |
+
|
30 |
+
|
31 |
+
def parse_args():
|
32 |
+
"""
|
33 |
+
Parse command line arguments.
|
34 |
+
"""
|
35 |
+
parser = argparse.ArgumentParser(
|
36 |
+
description="Prediction script for ReactionT5Yield model."
|
37 |
+
)
|
38 |
+
|
39 |
+
parser.add_argument(
|
40 |
+
"--input_data",
|
41 |
+
type=str,
|
42 |
+
required=True,
|
43 |
+
help="Data as a CSV file that contains an 'input' column. The format of the contents of the column are like 'REACTANT:{reactants of the reaction}PRODUCT:{products of the reaction}'. If there are multiple reactants, concatenate them with '.'.",
|
44 |
+
)
|
45 |
+
parser.add_argument(
|
46 |
+
"--model_name_or_path",
|
47 |
+
type=str,
|
48 |
+
help="Name or path of the finetuned model for prediction. Can be a local model or one from Hugging Face.",
|
49 |
+
)
|
50 |
+
parser.add_argument(
|
51 |
+
"--download_pretrained_model",
|
52 |
+
action="store_true",
|
53 |
+
help="Download finetuned model from hugging face hub and use it for prediction.",
|
54 |
+
)
|
55 |
+
parser.add_argument("--debug", action="store_true", help="Use debug mode.")
|
56 |
+
parser.add_argument(
|
57 |
+
"--input_max_length",
|
58 |
+
type=int,
|
59 |
+
default=300,
|
60 |
+
help="Maximum token length of input.",
|
61 |
+
)
|
62 |
+
parser.add_argument(
|
63 |
+
"--batch_size", type=int, default=5, required=False, help="Batch size."
|
64 |
+
)
|
65 |
+
parser.add_argument(
|
66 |
+
"--num_workers", type=int, default=4, help="Number of data loading workers."
|
67 |
+
)
|
68 |
+
parser.add_argument(
|
69 |
+
"--fc_dropout",
|
70 |
+
type=float,
|
71 |
+
default=0.0,
|
72 |
+
help="Dropout rate after fully connected layers.",
|
73 |
+
)
|
74 |
+
parser.add_argument(
|
75 |
+
"--output_dir",
|
76 |
+
type=str,
|
77 |
+
default="./",
|
78 |
+
help="Directory where predictions are saved.",
|
79 |
+
)
|
80 |
+
parser.add_argument(
|
81 |
+
"--seed", type=int, default=42, help="Random seed for reproducibility."
|
82 |
+
)
|
83 |
+
|
84 |
+
return parser.parse_args()
|
85 |
+
|
86 |
+
|
87 |
+
def inference_fn(test_loader, model, cfg):
|
88 |
+
"""
|
89 |
+
Inference function.
|
90 |
+
|
91 |
+
Args:
|
92 |
+
test_loader (DataLoader): DataLoader for test data.
|
93 |
+
model (nn.Module): Model for inference.
|
94 |
+
cfg (argparse.Namespace): Configuration object.
|
95 |
+
|
96 |
+
Returns:
|
97 |
+
np.ndarray: Predictions.
|
98 |
+
"""
|
99 |
+
model.eval()
|
100 |
+
model.to(cfg.device)
|
101 |
+
preds = []
|
102 |
+
|
103 |
+
for inputs in tqdm(test_loader, total=len(test_loader)):
|
104 |
+
inputs = {k: v.to(cfg.device) for k, v in inputs.items()}
|
105 |
+
with torch.no_grad():
|
106 |
+
y_preds = model(inputs)
|
107 |
+
preds.append(y_preds.to("cpu").numpy())
|
108 |
+
|
109 |
+
return np.concatenate(preds)
|
110 |
+
|
111 |
+
|
112 |
+
if __name__ == "__main__":
|
113 |
+
CFG = parse_args()
|
114 |
+
|
115 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
116 |
+
CFG.device = device
|
117 |
+
|
118 |
+
if not os.path.exists(CFG.output_dir):
|
119 |
+
os.makedirs(CFG.output_dir)
|
120 |
+
|
121 |
+
seed_everything(seed=CFG.seed)
|
122 |
+
|
123 |
+
if CFG.model_name_or_path is None:
|
124 |
+
CFG.download_pretrained_model = True
|
125 |
+
|
126 |
+
if CFG.download_pretrained_model:
|
127 |
+
download_pretrained_model()
|
128 |
+
CFG.model_name_or_path = "."
|
129 |
+
|
130 |
+
CFG.tokenizer = AutoTokenizer.from_pretrained(
|
131 |
+
os.path.abspath(CFG.model_name_or_path)
|
132 |
+
if os.path.exists(CFG.model_name_or_path)
|
133 |
+
else CFG.model_name_or_path,
|
134 |
+
return_tensors="pt",
|
135 |
+
)
|
136 |
+
|
137 |
+
model = ReactionT5Yield(
|
138 |
+
CFG,
|
139 |
+
config_path=os.path.join(CFG.model_name_or_path, "config.pth"),
|
140 |
+
pretrained=False,
|
141 |
+
)
|
142 |
+
pth_files = glob.glob(os.path.join(CFG.model_name_or_path, "*.pth"))
|
143 |
+
for pth_file in pth_files:
|
144 |
+
state = torch.load(
|
145 |
+
pth_file,
|
146 |
+
map_location=torch.device("cpu"),
|
147 |
+
)
|
148 |
+
try:
|
149 |
+
model.load_state_dict(state)
|
150 |
+
break
|
151 |
+
except:
|
152 |
+
pass
|
153 |
+
|
154 |
+
test_ds = pd.read_csv(CFG.input_data)
|
155 |
+
test_ds = preprocess_df(test_ds, CFG, drop_duplicates=False)
|
156 |
+
|
157 |
+
test_dataset = ReactionT5Dataset(CFG, test_ds)
|
158 |
+
test_loader = DataLoader(
|
159 |
+
test_dataset,
|
160 |
+
batch_size=CFG.batch_size,
|
161 |
+
shuffle=False,
|
162 |
+
num_workers=CFG.num_workers,
|
163 |
+
pin_memory=True,
|
164 |
+
drop_last=False,
|
165 |
+
)
|
166 |
+
|
167 |
+
prediction = inference_fn(test_loader, model, CFG)
|
168 |
+
|
169 |
+
test_ds["prediction"] = prediction * 100
|
170 |
+
test_ds["prediction"] = test_ds["prediction"].clip(0, 100)
|
171 |
+
test_ds.to_csv(
|
172 |
+
os.path.join(CFG.output_dir, "yield_prediction_output.csv"), index=False
|
173 |
+
)
|
task_yield/prediction_with_PreTrainedModel.py
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import logging
|
3 |
+
import os
|
4 |
+
import sys
|
5 |
+
import warnings
|
6 |
+
|
7 |
+
import pandas as pd
|
8 |
+
import torch
|
9 |
+
from datasets.utils.logging import disable_progress_bar
|
10 |
+
from torch.utils.data import DataLoader
|
11 |
+
from transformers import AutoTokenizer
|
12 |
+
|
13 |
+
# Suppress warnings and logging
|
14 |
+
warnings.filterwarnings("ignore")
|
15 |
+
logging.disable(logging.WARNING)
|
16 |
+
disable_progress_bar()
|
17 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
18 |
+
|
19 |
+
# Append the utils module path
|
20 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
21 |
+
from generation_utils import ReactionT5Dataset
|
22 |
+
from models import ReactionT5Yield2
|
23 |
+
from prediction import inference_fn
|
24 |
+
from train import preprocess_df
|
25 |
+
from utils import seed_everything
|
26 |
+
|
27 |
+
|
28 |
+
def parse_args():
|
29 |
+
"""
|
30 |
+
Parse command line arguments.
|
31 |
+
"""
|
32 |
+
parser = argparse.ArgumentParser(
|
33 |
+
description="Prediction script for ReactionT5Yield model."
|
34 |
+
)
|
35 |
+
|
36 |
+
parser.add_argument(
|
37 |
+
"--input_data",
|
38 |
+
type=str,
|
39 |
+
required=True,
|
40 |
+
help="Data as a CSV file that contains an 'input' column. The format of the contents of the column are like 'REACTANT:{reactants of the reaction}PRODUCT:{products of the reaction}'. If there are multiple reactants, concatenate them with '.'.",
|
41 |
+
)
|
42 |
+
parser.add_argument(
|
43 |
+
"--model_name_or_path",
|
44 |
+
type=str,
|
45 |
+
default="sagawa/ReactionT5v2-yield",
|
46 |
+
help="Name or path of the finetuned model for prediction. Can be a local model or one from Hugging Face.",
|
47 |
+
)
|
48 |
+
parser.add_argument("--debug", action="store_true", help="Use debug mode.")
|
49 |
+
parser.add_argument(
|
50 |
+
"--input_max_length",
|
51 |
+
type=int,
|
52 |
+
default=400,
|
53 |
+
help="Maximum token length of input.",
|
54 |
+
)
|
55 |
+
parser.add_argument(
|
56 |
+
"--batch_size", type=int, default=5, required=False, help="Batch size."
|
57 |
+
)
|
58 |
+
parser.add_argument(
|
59 |
+
"--num_workers", type=int, default=4, help="Number of data loading workers."
|
60 |
+
)
|
61 |
+
parser.add_argument(
|
62 |
+
"--fc_dropout",
|
63 |
+
type=float,
|
64 |
+
default=0.0,
|
65 |
+
help="Dropout rate after fully connected layers.",
|
66 |
+
)
|
67 |
+
parser.add_argument(
|
68 |
+
"--output_dir",
|
69 |
+
type=str,
|
70 |
+
default="./",
|
71 |
+
help="Directory where predictions are saved.",
|
72 |
+
)
|
73 |
+
parser.add_argument(
|
74 |
+
"--seed", type=int, default=42, help="Random seed for reproducibility."
|
75 |
+
)
|
76 |
+
|
77 |
+
return parser.parse_args()
|
78 |
+
|
79 |
+
|
80 |
+
if __name__ == "__main__":
|
81 |
+
CFG = parse_args()
|
82 |
+
|
83 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
84 |
+
CFG.device = device
|
85 |
+
|
86 |
+
if not os.path.exists(CFG.output_dir):
|
87 |
+
os.makedirs(CFG.output_dir)
|
88 |
+
|
89 |
+
seed_everything(seed=CFG.seed)
|
90 |
+
|
91 |
+
CFG.tokenizer = AutoTokenizer.from_pretrained(
|
92 |
+
os.path.abspath(CFG.model_name_or_path)
|
93 |
+
if os.path.exists(CFG.model_name_or_path)
|
94 |
+
else CFG.model_name_or_path,
|
95 |
+
return_tensors="pt",
|
96 |
+
)
|
97 |
+
|
98 |
+
model = ReactionT5Yield2.from_pretrained(CFG.model_name_or_path)
|
99 |
+
|
100 |
+
test_ds = pd.read_csv(CFG.input_data)
|
101 |
+
test_ds = preprocess_df(test_ds, CFG, drop_duplicates=False)
|
102 |
+
|
103 |
+
test_dataset = ReactionT5Dataset(CFG, test_ds)
|
104 |
+
test_loader = DataLoader(
|
105 |
+
test_dataset,
|
106 |
+
batch_size=CFG.batch_size,
|
107 |
+
shuffle=False,
|
108 |
+
num_workers=CFG.num_workers,
|
109 |
+
pin_memory=True,
|
110 |
+
drop_last=False,
|
111 |
+
)
|
112 |
+
|
113 |
+
prediction = inference_fn(test_loader, model, CFG)
|
114 |
+
|
115 |
+
test_ds["prediction"] = prediction
|
116 |
+
test_ds["prediction"] = test_ds["prediction"].clip(0, 100)
|
117 |
+
test_ds.to_csv(
|
118 |
+
os.path.join(CFG.output_dir, "yield_prediction_output.csv"), index=False
|
119 |
+
)
|
task_yield/train.py
ADDED
@@ -0,0 +1,570 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import gc
|
3 |
+
import glob
|
4 |
+
import os
|
5 |
+
import sys
|
6 |
+
import time
|
7 |
+
import warnings
|
8 |
+
from pathlib import Path
|
9 |
+
|
10 |
+
import numpy as np
|
11 |
+
import pandas as pd
|
12 |
+
import torch
|
13 |
+
import torch.nn as nn
|
14 |
+
from datasets.utils.logging import disable_progress_bar
|
15 |
+
from sklearn.metrics import mean_squared_error, r2_score
|
16 |
+
from torch.optim import AdamW
|
17 |
+
from torch.utils.data import DataLoader, Dataset
|
18 |
+
from transformers import AutoTokenizer, get_linear_schedule_with_warmup
|
19 |
+
|
20 |
+
# Append the utils module path
|
21 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
22 |
+
from generation_utils import prepare_input
|
23 |
+
from models import ReactionT5Yield
|
24 |
+
from rdkit import RDLogger
|
25 |
+
from utils import (
|
26 |
+
AverageMeter,
|
27 |
+
add_new_tokens,
|
28 |
+
canonicalize,
|
29 |
+
filter_out,
|
30 |
+
get_logger,
|
31 |
+
get_optimizer_params,
|
32 |
+
seed_everything,
|
33 |
+
space_clean,
|
34 |
+
timeSince,
|
35 |
+
)
|
36 |
+
|
37 |
+
# Suppress warnings and logging
|
38 |
+
warnings.filterwarnings("ignore")
|
39 |
+
RDLogger.DisableLog("rdApp.*")
|
40 |
+
disable_progress_bar()
|
41 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
42 |
+
|
43 |
+
|
44 |
+
def parse_args():
|
45 |
+
"""
|
46 |
+
Parse command line arguments.
|
47 |
+
"""
|
48 |
+
parser = argparse.ArgumentParser(
|
49 |
+
description="Training script for ReactionT5Yield model."
|
50 |
+
)
|
51 |
+
|
52 |
+
parser.add_argument(
|
53 |
+
"--train_data_path",
|
54 |
+
type=str,
|
55 |
+
required=True,
|
56 |
+
help="Path to training data CSV file.",
|
57 |
+
)
|
58 |
+
parser.add_argument(
|
59 |
+
"--valid_data_path",
|
60 |
+
type=str,
|
61 |
+
required=True,
|
62 |
+
help="Path to validation data CSV file.",
|
63 |
+
)
|
64 |
+
parser.add_argument(
|
65 |
+
"--test_data_path",
|
66 |
+
type=str,
|
67 |
+
help="Path to testing data CSV file.",
|
68 |
+
)
|
69 |
+
parser.add_argument(
|
70 |
+
"--CN_test_data_path",
|
71 |
+
type=str,
|
72 |
+
help="Path to CN testing data CSV file.",
|
73 |
+
)
|
74 |
+
parser.add_argument(
|
75 |
+
"--pretrained_model_name_or_path",
|
76 |
+
type=str,
|
77 |
+
default="sagawa/CompoundT5",
|
78 |
+
help="Pretrained model name or path.",
|
79 |
+
)
|
80 |
+
parser.add_argument(
|
81 |
+
"--model_name_or_path",
|
82 |
+
type=str,
|
83 |
+
help="The model's name or path used for fine-tuning.",
|
84 |
+
)
|
85 |
+
parser.add_argument("--debug", action="store_true", help="Enable debug mode.")
|
86 |
+
parser.add_argument(
|
87 |
+
"--epochs", type=int, default=5, help="Number of training epochs."
|
88 |
+
)
|
89 |
+
parser.add_argument(
|
90 |
+
"--patience", type=int, default=10, help="Early stopping patience."
|
91 |
+
)
|
92 |
+
parser.add_argument("--lr", type=float, default=5e-4, help="Learning rate.")
|
93 |
+
parser.add_argument("--batch_size", type=int, default=5, help="Batch size.")
|
94 |
+
parser.add_argument(
|
95 |
+
"--input_max_length", type=int, default=400, help="Maximum input token length."
|
96 |
+
)
|
97 |
+
parser.add_argument(
|
98 |
+
"--num_workers", type=int, default=4, help="Number of data loading workers."
|
99 |
+
)
|
100 |
+
parser.add_argument(
|
101 |
+
"--fc_dropout",
|
102 |
+
type=float,
|
103 |
+
default=0.0,
|
104 |
+
help="Dropout rate after fully connected layers.",
|
105 |
+
)
|
106 |
+
parser.add_argument(
|
107 |
+
"--eps", type=float, default=1e-6, help="Epsilon for Adam optimizer."
|
108 |
+
)
|
109 |
+
parser.add_argument(
|
110 |
+
"--weight_decay", type=float, default=0.05, help="Weight decay for optimizer."
|
111 |
+
)
|
112 |
+
parser.add_argument(
|
113 |
+
"--max_grad_norm",
|
114 |
+
type=int,
|
115 |
+
default=1000,
|
116 |
+
help="Maximum gradient norm for clipping.",
|
117 |
+
)
|
118 |
+
parser.add_argument(
|
119 |
+
"--gradient_accumulation_steps",
|
120 |
+
type=int,
|
121 |
+
default=1,
|
122 |
+
help="Gradient accumulation steps.",
|
123 |
+
)
|
124 |
+
parser.add_argument(
|
125 |
+
"--num_warmup_steps", type=int, default=0, help="Number of warmup steps."
|
126 |
+
)
|
127 |
+
parser.add_argument(
|
128 |
+
"--batch_scheduler", action="store_true", help="Use batch scheduler."
|
129 |
+
)
|
130 |
+
parser.add_argument(
|
131 |
+
"--print_freq", type=int, default=100, help="Logging frequency."
|
132 |
+
)
|
133 |
+
parser.add_argument(
|
134 |
+
"--use_amp",
|
135 |
+
action="store_true",
|
136 |
+
help="Use automatic mixed precision for training.",
|
137 |
+
)
|
138 |
+
parser.add_argument(
|
139 |
+
"--output_dir",
|
140 |
+
type=str,
|
141 |
+
default="./",
|
142 |
+
help="Directory to save the trained model.",
|
143 |
+
)
|
144 |
+
parser.add_argument(
|
145 |
+
"--seed", type=int, default=42, help="Random seed for reproducibility."
|
146 |
+
)
|
147 |
+
parser.add_argument(
|
148 |
+
"--sampling_num",
|
149 |
+
type=int,
|
150 |
+
default=-1,
|
151 |
+
help="Number of samples used for training. If you want to use all samples, set -1.",
|
152 |
+
)
|
153 |
+
parser.add_argument(
|
154 |
+
"--sampling_frac",
|
155 |
+
type=float,
|
156 |
+
default=-1.0,
|
157 |
+
help="Ratio of samples used for training. If you want to use all samples, set -1.0.",
|
158 |
+
)
|
159 |
+
parser.add_argument(
|
160 |
+
"--checkpoint",
|
161 |
+
type=str,
|
162 |
+
help="Path to the checkpoint file for resuming training.",
|
163 |
+
)
|
164 |
+
|
165 |
+
return parser.parse_args()
|
166 |
+
|
167 |
+
|
168 |
+
def preprocess_df(df, cfg, drop_duplicates=True):
|
169 |
+
"""
|
170 |
+
Preprocess the input DataFrame for training.
|
171 |
+
|
172 |
+
Args:
|
173 |
+
df (pd.DataFrame): Input DataFrame.
|
174 |
+
cfg (argparse.Namespace): Configuration object.
|
175 |
+
|
176 |
+
Returns:
|
177 |
+
pd.DataFrame: Preprocessed DataFrame.
|
178 |
+
"""
|
179 |
+
if "YIELD" in df.columns:
|
180 |
+
# if max yield is 100, then normalize to [0, 1]
|
181 |
+
if df["YIELD"].max() >= 100:
|
182 |
+
df["YIELD"] = df["YIELD"].clip(0, 100) / 100
|
183 |
+
else:
|
184 |
+
df["YIELD"] = None
|
185 |
+
|
186 |
+
for col in ["REACTANT", "PRODUCT", "CATALYST", "REAGENT", "SOLVENT"]:
|
187 |
+
if col not in df.columns:
|
188 |
+
df[col] = None
|
189 |
+
df[col] = df[col].fillna(" ")
|
190 |
+
|
191 |
+
df["REAGENT"] = df["CATALYST"] + "." + df["REAGENT"]
|
192 |
+
|
193 |
+
for col in ["REAGENT", "REACTANT", "PRODUCT"]:
|
194 |
+
df[col] = df[col].apply(lambda x: space_clean(x))
|
195 |
+
df[col] = df[col].apply(lambda x: canonicalize(x) if x != " " else " ")
|
196 |
+
df = df[~df[col].isna()].reset_index(drop=True)
|
197 |
+
df[col] = df[col].apply(lambda x: ".".join(sorted(x.split("."))))
|
198 |
+
|
199 |
+
df["input"] = (
|
200 |
+
"REACTANT:"
|
201 |
+
+ df["REACTANT"]
|
202 |
+
+ "REAGENT:"
|
203 |
+
+ df["REAGENT"]
|
204 |
+
+ "PRODUCT:"
|
205 |
+
+ df["PRODUCT"]
|
206 |
+
)
|
207 |
+
if drop_duplicates:
|
208 |
+
df = df.loc[df[["input", "YIELD"]].drop_duplicates().index].reset_index(
|
209 |
+
drop=True
|
210 |
+
)
|
211 |
+
|
212 |
+
if cfg.debug:
|
213 |
+
df = df.head(1000)
|
214 |
+
|
215 |
+
return df
|
216 |
+
|
217 |
+
|
218 |
+
def preprocess_CN(df):
|
219 |
+
"""
|
220 |
+
Preprocess the CN test DataFrame.
|
221 |
+
|
222 |
+
Args:
|
223 |
+
df (pd.DataFrame): Input DataFrame.
|
224 |
+
|
225 |
+
Returns:
|
226 |
+
pd.DataFrame: Preprocessed DataFrame.
|
227 |
+
"""
|
228 |
+
df["REACTANT"] = df["REACTANT"].apply(lambda x: ".".join(sorted(x.split("."))))
|
229 |
+
df["REAGENT"] = df["REAGENT"].apply(lambda x: ".".join(sorted(x.split("."))))
|
230 |
+
df["PRODUCT"] = df["PRODUCT"].apply(lambda x: ".".join(sorted(x.split("."))))
|
231 |
+
df["input"] = (
|
232 |
+
"REACTANT:"
|
233 |
+
+ df["REACTANT"]
|
234 |
+
+ "REAGENT:"
|
235 |
+
+ df["REAGENT"]
|
236 |
+
+ "PRODUCT:"
|
237 |
+
+ df["PRODUCT"]
|
238 |
+
)
|
239 |
+
df["pair"] = df["input"]
|
240 |
+
return df
|
241 |
+
|
242 |
+
|
243 |
+
class TrainDataset(Dataset):
|
244 |
+
"""
|
245 |
+
Dataset class for training.
|
246 |
+
"""
|
247 |
+
|
248 |
+
def __init__(self, cfg, df):
|
249 |
+
self.cfg = cfg
|
250 |
+
self.inputs = df["input"].values
|
251 |
+
self.labels = df["YIELD"].values
|
252 |
+
|
253 |
+
def __len__(self):
|
254 |
+
return len(self.labels)
|
255 |
+
|
256 |
+
def __getitem__(self, item):
|
257 |
+
inputs = prepare_input(self.cfg, self.inputs[item])
|
258 |
+
label = torch.tensor(self.labels[item], dtype=torch.float)
|
259 |
+
return inputs, label
|
260 |
+
|
261 |
+
|
262 |
+
def save_checkpoint(state, filename="checkpoint.pth.tar"):
|
263 |
+
"""
|
264 |
+
Save model checkpoint.
|
265 |
+
|
266 |
+
Args:
|
267 |
+
state (dict): Checkpoint state.
|
268 |
+
filename (str): Filename to save the checkpoint.
|
269 |
+
"""
|
270 |
+
torch.save(state, filename)
|
271 |
+
|
272 |
+
|
273 |
+
def train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, cfg):
|
274 |
+
"""
|
275 |
+
Training function for one epoch.
|
276 |
+
|
277 |
+
Args:
|
278 |
+
train_loader (DataLoader): DataLoader for training data.
|
279 |
+
model (nn.Module): Model to be trained.
|
280 |
+
criterion (nn.Module): Loss function.
|
281 |
+
optimizer (Optimizer): Optimizer.
|
282 |
+
epoch (int): Current epoch.
|
283 |
+
scheduler (Scheduler): Learning rate scheduler.
|
284 |
+
cfg (argparse.Namespace): Configuration object.
|
285 |
+
|
286 |
+
Returns:
|
287 |
+
float: Average training loss.
|
288 |
+
"""
|
289 |
+
model.train()
|
290 |
+
scaler = torch.amp.GradScaler(enabled=cfg.use_amp)
|
291 |
+
losses = AverageMeter()
|
292 |
+
start = time.time()
|
293 |
+
|
294 |
+
for step, (inputs, labels) in enumerate(train_loader):
|
295 |
+
inputs = {k: v.to(cfg.device) for k, v in inputs.items()}
|
296 |
+
labels = labels.to(cfg.device)
|
297 |
+
batch_size = labels.size(0)
|
298 |
+
|
299 |
+
with torch.autocast(cfg.device, enabled=cfg.use_amp):
|
300 |
+
y_preds = model(inputs)
|
301 |
+
loss = criterion(y_preds.view(-1, 1), labels.view(-1, 1))
|
302 |
+
|
303 |
+
if cfg.gradient_accumulation_steps > 1:
|
304 |
+
loss /= cfg.gradient_accumulation_steps
|
305 |
+
|
306 |
+
losses.update(loss.item(), batch_size)
|
307 |
+
scaler.scale(loss).backward()
|
308 |
+
|
309 |
+
grad_norm = torch.nn.utils.clip_grad_norm_(
|
310 |
+
model.parameters(), cfg.max_grad_norm
|
311 |
+
)
|
312 |
+
|
313 |
+
if (step + 1) % cfg.gradient_accumulation_steps == 0:
|
314 |
+
scaler.step(optimizer)
|
315 |
+
scaler.update()
|
316 |
+
optimizer.zero_grad()
|
317 |
+
|
318 |
+
if cfg.batch_scheduler:
|
319 |
+
scheduler.step()
|
320 |
+
|
321 |
+
if step % cfg.print_freq == 0 or step == (len(train_loader) - 1):
|
322 |
+
print(
|
323 |
+
f"Epoch: [{epoch + 1}][{step}/{len(train_loader)}] "
|
324 |
+
f"Elapsed {timeSince(start, float(step + 1) / len(train_loader))} "
|
325 |
+
f"Loss: {losses.val:.4f}({losses.avg:.4f}) "
|
326 |
+
f"Grad: {grad_norm:.4f} "
|
327 |
+
f"LR: {scheduler.get_lr()[0]:.8f}"
|
328 |
+
)
|
329 |
+
|
330 |
+
return losses.avg
|
331 |
+
|
332 |
+
|
333 |
+
def valid_fn(valid_loader, model, cfg):
|
334 |
+
"""
|
335 |
+
Validation function.
|
336 |
+
|
337 |
+
Args:
|
338 |
+
valid_loader (DataLoader): DataLoader for validation data.
|
339 |
+
model (nn.Module): Model to be validated.
|
340 |
+
cfg (argparse.Namespace): Configuration object.
|
341 |
+
|
342 |
+
Returns:
|
343 |
+
tuple: Validation loss and R^2 score.
|
344 |
+
"""
|
345 |
+
model.eval()
|
346 |
+
start = time.time()
|
347 |
+
label_list = []
|
348 |
+
pred_list = []
|
349 |
+
|
350 |
+
for step, (inputs, labels) in enumerate(valid_loader):
|
351 |
+
inputs = {k: v.to(cfg.device) for k, v in inputs.items()}
|
352 |
+
with torch.no_grad():
|
353 |
+
y_preds = model(inputs)
|
354 |
+
label_list.extend(labels.tolist())
|
355 |
+
pred_list.extend(y_preds.tolist())
|
356 |
+
|
357 |
+
if step % cfg.print_freq == 0 or step == (len(valid_loader) - 1):
|
358 |
+
print(
|
359 |
+
f"EVAL: [{step}/{len(valid_loader)}] "
|
360 |
+
f"Elapsed {timeSince(start, float(step + 1) / len(valid_loader))} "
|
361 |
+
f"RMSE Loss: {np.sqrt(mean_squared_error(label_list, pred_list)):.4f} "
|
362 |
+
f"R^2 Score: {r2_score(label_list, pred_list):.4f}"
|
363 |
+
)
|
364 |
+
|
365 |
+
return mean_squared_error(label_list, pred_list), r2_score(label_list, pred_list)
|
366 |
+
|
367 |
+
|
368 |
+
def train_loop(train_ds, valid_ds, cfg):
|
369 |
+
"""
|
370 |
+
Training loop.
|
371 |
+
|
372 |
+
Args:
|
373 |
+
train_ds (pd.DataFrame): Training data.
|
374 |
+
valid_ds (pd.DataFrame): Validation data.
|
375 |
+
"""
|
376 |
+
train_dataset = TrainDataset(cfg, train_ds)
|
377 |
+
valid_dataset = TrainDataset(cfg, valid_ds)
|
378 |
+
|
379 |
+
train_loader = DataLoader(
|
380 |
+
train_dataset,
|
381 |
+
batch_size=cfg.batch_size,
|
382 |
+
shuffle=True,
|
383 |
+
num_workers=cfg.num_workers,
|
384 |
+
pin_memory=True,
|
385 |
+
drop_last=True,
|
386 |
+
)
|
387 |
+
valid_loader = DataLoader(
|
388 |
+
valid_dataset,
|
389 |
+
batch_size=cfg.batch_size,
|
390 |
+
shuffle=False,
|
391 |
+
num_workers=cfg.num_workers,
|
392 |
+
pin_memory=True,
|
393 |
+
drop_last=False,
|
394 |
+
)
|
395 |
+
|
396 |
+
if not cfg.model_name_or_path:
|
397 |
+
model = ReactionT5Yield(cfg, config_path=None, pretrained=True)
|
398 |
+
torch.save(model.config, os.path.join(cfg.output_dir, "config.pth"))
|
399 |
+
else:
|
400 |
+
model = ReactionT5Yield(
|
401 |
+
cfg,
|
402 |
+
config_path=os.path.join(cfg.model_name_or_path, "config.pth"),
|
403 |
+
pretrained=False,
|
404 |
+
)
|
405 |
+
torch.save(model.config, os.path.join(cfg.output_dir, "config.pth"))
|
406 |
+
pth_files = glob.glob(os.path.join(cfg.model_name_or_path, "*.pth"))
|
407 |
+
for pth_file in pth_files:
|
408 |
+
state = torch.load(
|
409 |
+
pth_file, map_location=torch.device("cpu"), weights_only=False
|
410 |
+
)
|
411 |
+
try:
|
412 |
+
model.load_state_dict(state)
|
413 |
+
break
|
414 |
+
except:
|
415 |
+
pass
|
416 |
+
model.to(cfg.device)
|
417 |
+
|
418 |
+
optimizer_parameters = get_optimizer_params(
|
419 |
+
model, encoder_lr=cfg.lr, decoder_lr=cfg.lr, weight_decay=cfg.weight_decay
|
420 |
+
)
|
421 |
+
optimizer = AdamW(optimizer_parameters, lr=cfg.lr, eps=cfg.eps, betas=(0.9, 0.999))
|
422 |
+
|
423 |
+
num_train_steps = int(len(train_ds) / cfg.batch_size * cfg.epochs)
|
424 |
+
scheduler = get_linear_schedule_with_warmup(
|
425 |
+
optimizer,
|
426 |
+
num_warmup_steps=cfg.num_warmup_steps,
|
427 |
+
num_training_steps=num_train_steps,
|
428 |
+
)
|
429 |
+
|
430 |
+
criterion = nn.MSELoss(reduction="mean")
|
431 |
+
best_loss = float("inf")
|
432 |
+
start_epoch = 0
|
433 |
+
es_count = 0
|
434 |
+
|
435 |
+
if cfg.checkpoint:
|
436 |
+
checkpoint = torch.load(cfg.checkpoint)
|
437 |
+
model.load_state_dict(checkpoint["state_dict"])
|
438 |
+
optimizer.load_state_dict(checkpoint["optimizer"])
|
439 |
+
scheduler.load_state_dict(checkpoint["scheduler"])
|
440 |
+
best_loss = checkpoint["loss"]
|
441 |
+
start_epoch = checkpoint["epoch"] + 1
|
442 |
+
es_count = checkpoint["es_count"]
|
443 |
+
del checkpoint
|
444 |
+
|
445 |
+
for epoch in range(start_epoch, cfg.epochs):
|
446 |
+
start_time = time.time()
|
447 |
+
|
448 |
+
avg_loss = train_fn(
|
449 |
+
train_loader, model, criterion, optimizer, epoch, scheduler, cfg
|
450 |
+
)
|
451 |
+
val_loss, val_r2_score = valid_fn(valid_loader, model, cfg)
|
452 |
+
|
453 |
+
elapsed = time.time() - start_time
|
454 |
+
|
455 |
+
cfg.logger.info(
|
456 |
+
f"Epoch {epoch + 1} - avg_train_loss: {avg_loss:.4f} val_rmse_loss: {val_loss:.4f} val_r2_score: {val_r2_score:.4f} time: {elapsed:.0f}s"
|
457 |
+
)
|
458 |
+
|
459 |
+
if val_loss < best_loss:
|
460 |
+
es_count = 0
|
461 |
+
best_loss = val_loss
|
462 |
+
cfg.logger.info(
|
463 |
+
f"Epoch {epoch + 1} - Save Lowest Loss: {best_loss:.4f} Model"
|
464 |
+
)
|
465 |
+
torch.save(
|
466 |
+
model.state_dict(),
|
467 |
+
os.path.join(
|
468 |
+
cfg.output_dir,
|
469 |
+
f"{cfg.pretrained_model_name_or_path.split('/')[-1]}_best.pth",
|
470 |
+
),
|
471 |
+
)
|
472 |
+
else:
|
473 |
+
es_count += 1
|
474 |
+
if es_count >= cfg.patience:
|
475 |
+
print("Early stopping")
|
476 |
+
break
|
477 |
+
|
478 |
+
save_checkpoint(
|
479 |
+
{
|
480 |
+
"epoch": epoch,
|
481 |
+
"state_dict": model.state_dict(),
|
482 |
+
"optimizer": optimizer.state_dict(),
|
483 |
+
"scheduler": scheduler.state_dict(),
|
484 |
+
"loss": best_loss,
|
485 |
+
"es_count": es_count,
|
486 |
+
},
|
487 |
+
filename=os.path.join(cfg.output_dir, "checkpoint.pth.tar"),
|
488 |
+
)
|
489 |
+
|
490 |
+
torch.cuda.empty_cache()
|
491 |
+
gc.collect()
|
492 |
+
|
493 |
+
|
494 |
+
if __name__ == "__main__":
|
495 |
+
CFG = parse_args()
|
496 |
+
CFG.batch_scheduler = True
|
497 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
498 |
+
CFG.device = device
|
499 |
+
if not os.path.exists(CFG.output_dir):
|
500 |
+
os.makedirs(CFG.output_dir)
|
501 |
+
seed_everything(seed=CFG.seed)
|
502 |
+
|
503 |
+
train = preprocess_df(
|
504 |
+
filter_out(pd.read_csv(CFG.train_data_path), ["YIELD", "REACTANT", "PRODUCT"]),
|
505 |
+
CFG,
|
506 |
+
)
|
507 |
+
valid = preprocess_df(
|
508 |
+
filter_out(pd.read_csv(CFG.valid_data_path), ["YIELD", "REACTANT", "PRODUCT"]),
|
509 |
+
CFG,
|
510 |
+
)
|
511 |
+
|
512 |
+
if CFG.CN_test_data_path:
|
513 |
+
train_copy = preprocess_CN(train.copy())
|
514 |
+
CN_test = preprocess_CN(pd.read_csv(CFG.CN_test_data_path))
|
515 |
+
|
516 |
+
print(len(train))
|
517 |
+
train = train[~train_copy["pair"].isin(CN_test["pair"])].reset_index(drop=True)
|
518 |
+
print(len(train))
|
519 |
+
|
520 |
+
train["pair"] = train["input"] + " - " + train["YIELD"].astype(str)
|
521 |
+
valid["pair"] = valid["input"] + " - " + valid["YIELD"].astype(str)
|
522 |
+
valid = valid[~valid["pair"].isin(train["pair"])].reset_index(drop=True)
|
523 |
+
|
524 |
+
if CFG.sampling_num > 0:
|
525 |
+
train = train.sample(n=CFG.sampling_num, random_state=CFG.seed).reset_index(
|
526 |
+
drop=True
|
527 |
+
)
|
528 |
+
elif CFG.sampling_frac > 0:
|
529 |
+
train = train.sample(frac=CFG.sampling_frac, random_state=CFG.seed).reset_index(
|
530 |
+
drop=True
|
531 |
+
)
|
532 |
+
|
533 |
+
train.to_csv("train.csv", index=False)
|
534 |
+
valid.to_csv("valid.csv", index=False)
|
535 |
+
|
536 |
+
if CFG.test_data_path:
|
537 |
+
test = filter_out(
|
538 |
+
pd.read_csv(CFG.test_data_path), ["YIELD", "REACTANT", "PRODUCT"]
|
539 |
+
)
|
540 |
+
test = preprocess_df(test, CFG)
|
541 |
+
test["pair"] = test["input"] + " - " + test["YIELD"].astype(str)
|
542 |
+
test = test[~test["pair"].isin(train["pair"])].reset_index(drop=True)
|
543 |
+
test = test.drop_duplicates(subset=["pair"]).reset_index(drop=True)
|
544 |
+
test.to_csv("test.csv", index=False)
|
545 |
+
|
546 |
+
LOGGER = get_logger(os.path.join(CFG.output_dir, "train"))
|
547 |
+
CFG.logger = LOGGER
|
548 |
+
|
549 |
+
# load tokenizer
|
550 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
551 |
+
os.path.abspath(CFG.model_name_or_path)
|
552 |
+
if os.path.exists(CFG.model_name_or_path)
|
553 |
+
else CFG.model_name_or_path,
|
554 |
+
return_tensors="pt",
|
555 |
+
)
|
556 |
+
tokenizer = add_new_tokens(
|
557 |
+
tokenizer,
|
558 |
+
Path(__file__).resolve().parent.parent / "data" / "additional_tokens.txt",
|
559 |
+
)
|
560 |
+
|
561 |
+
tokenizer.add_special_tokens(
|
562 |
+
{
|
563 |
+
"additional_special_tokens": tokenizer.additional_special_tokens
|
564 |
+
+ ["REACTANT:", "PRODUCT:", "REAGENT:"]
|
565 |
+
}
|
566 |
+
)
|
567 |
+
tokenizer.save_pretrained(CFG.output_dir)
|
568 |
+
CFG.tokenizer = tokenizer
|
569 |
+
|
570 |
+
train_loop(train, valid, CFG)
|
task_yield/visualize_embedding.ipynb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5187f4fb3a6d7fc19873902ca53a1699152cdc5cb50e79bd946bb430b7be154d
|
3 |
+
size 10491206
|
utils.py
ADDED
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
import os
|
3 |
+
import pickle
|
4 |
+
import random
|
5 |
+
import time
|
6 |
+
|
7 |
+
import numpy as np
|
8 |
+
import torch
|
9 |
+
from rdkit import Chem
|
10 |
+
|
11 |
+
|
12 |
+
def seed_everything(seed=42):
|
13 |
+
random.seed(seed)
|
14 |
+
os.environ["PYTHONHASHSEED"] = str(seed)
|
15 |
+
np.random.seed(seed)
|
16 |
+
torch.manual_seed(seed)
|
17 |
+
torch.cuda.manual_seed(seed)
|
18 |
+
torch.backends.cudnn.deterministic = True
|
19 |
+
|
20 |
+
|
21 |
+
def space_clean(row):
|
22 |
+
row = row.replace(". ", "").replace(" .", "").replace(" ", " ")
|
23 |
+
return row
|
24 |
+
|
25 |
+
|
26 |
+
def canonicalize(smiles):
|
27 |
+
try:
|
28 |
+
new_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(smiles), canonical=True)
|
29 |
+
except:
|
30 |
+
new_smiles = None
|
31 |
+
return new_smiles
|
32 |
+
|
33 |
+
|
34 |
+
def canonicalize_str(smiles):
|
35 |
+
"""Try to canonicalize the molecule, return empty string if fails."""
|
36 |
+
if "%" in smiles:
|
37 |
+
return smiles
|
38 |
+
else:
|
39 |
+
try:
|
40 |
+
return canonicalize(smiles)
|
41 |
+
except:
|
42 |
+
return ""
|
43 |
+
|
44 |
+
|
45 |
+
def uncanonicalize(smiles):
|
46 |
+
try:
|
47 |
+
new_smiles = []
|
48 |
+
for smiles_i in smiles.split("."):
|
49 |
+
mol = Chem.MolFromSmiles(smiles_i)
|
50 |
+
atom_indices = list(range(mol.GetNumAtoms()))
|
51 |
+
random.shuffle(atom_indices)
|
52 |
+
new_smiles_i = Chem.MolToSmiles(
|
53 |
+
mol, rootedAtAtom=atom_indices[0], canonical=False
|
54 |
+
)
|
55 |
+
new_smiles.append(new_smiles_i)
|
56 |
+
smiles = ".".join(new_smiles)
|
57 |
+
except:
|
58 |
+
smiles = None
|
59 |
+
return smiles
|
60 |
+
|
61 |
+
|
62 |
+
def remove_atom_mapping(smi):
|
63 |
+
mol = Chem.MolFromSmiles(smi)
|
64 |
+
[a.SetAtomMapNum(0) for a in mol.GetAtoms()]
|
65 |
+
smi = Chem.MolToSmiles(mol, canonical=True)
|
66 |
+
return canonicalize(smi)
|
67 |
+
|
68 |
+
|
69 |
+
def get_logger(filename="train"):
|
70 |
+
from logging import INFO, FileHandler, Formatter, StreamHandler, getLogger
|
71 |
+
|
72 |
+
logger = getLogger(__name__)
|
73 |
+
logger.setLevel(INFO)
|
74 |
+
handler1 = StreamHandler()
|
75 |
+
handler1.setFormatter(Formatter("%(message)s"))
|
76 |
+
handler2 = FileHandler(filename=f"{filename}.log")
|
77 |
+
handler2.setFormatter(Formatter("%(message)s"))
|
78 |
+
logger.addHandler(handler1)
|
79 |
+
logger.addHandler(handler2)
|
80 |
+
return logger
|
81 |
+
|
82 |
+
|
83 |
+
class AverageMeter(object):
|
84 |
+
def __init__(self):
|
85 |
+
self.reset()
|
86 |
+
|
87 |
+
def reset(self):
|
88 |
+
self.val = 0
|
89 |
+
self.avg = 0
|
90 |
+
self.sum = 0
|
91 |
+
self.count = 0
|
92 |
+
|
93 |
+
def update(self, val, n=1):
|
94 |
+
self.val = val
|
95 |
+
self.sum += val * n
|
96 |
+
self.count += n
|
97 |
+
self.avg = self.sum / self.count
|
98 |
+
|
99 |
+
|
100 |
+
def asMinutes(s):
|
101 |
+
m = math.floor(s / 60)
|
102 |
+
s -= m * 60
|
103 |
+
return "%dm %ds" % (m, s)
|
104 |
+
|
105 |
+
|
106 |
+
def timeSince(since, percent):
|
107 |
+
now = time.time()
|
108 |
+
s = now - since
|
109 |
+
es = s / (percent)
|
110 |
+
rs = es - s
|
111 |
+
return "%s (remain %s)" % (asMinutes(s), asMinutes(rs))
|
112 |
+
|
113 |
+
|
114 |
+
def get_optimizer_params(model, encoder_lr, decoder_lr, weight_decay=0.0):
|
115 |
+
no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
|
116 |
+
optimizer_parameters = [
|
117 |
+
{
|
118 |
+
"params": [
|
119 |
+
p
|
120 |
+
for n, p in model.model.named_parameters()
|
121 |
+
if not any(nd in n for nd in no_decay)
|
122 |
+
],
|
123 |
+
"lr": encoder_lr,
|
124 |
+
"weight_decay": weight_decay,
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"params": [
|
128 |
+
p
|
129 |
+
for n, p in model.model.named_parameters()
|
130 |
+
if any(nd in n for nd in no_decay)
|
131 |
+
],
|
132 |
+
"lr": encoder_lr,
|
133 |
+
"weight_decay": 0.0,
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"params": [p for n, p in model.named_parameters() if "model" not in n],
|
137 |
+
"lr": decoder_lr,
|
138 |
+
"weight_decay": 0.0,
|
139 |
+
},
|
140 |
+
]
|
141 |
+
return optimizer_parameters
|
142 |
+
|
143 |
+
|
144 |
+
def to_cpu(obj):
|
145 |
+
if torch.is_tensor(obj):
|
146 |
+
return obj.to("cpu")
|
147 |
+
elif isinstance(obj, dict):
|
148 |
+
return {k: to_cpu(v) for k, v in obj.items()}
|
149 |
+
elif (
|
150 |
+
isinstance(obj, list)
|
151 |
+
or isinstance(obj, tuple)
|
152 |
+
or isinstance(obj, set)
|
153 |
+
or isinstance(obj, torch.Tensor)
|
154 |
+
):
|
155 |
+
return [to_cpu(v) for v in obj]
|
156 |
+
else:
|
157 |
+
return obj
|
158 |
+
|
159 |
+
|
160 |
+
def get_accuracy_score(eval_preds, cfg):
|
161 |
+
preds, labels = eval_preds
|
162 |
+
if isinstance(preds, tuple):
|
163 |
+
preds = preds[0]
|
164 |
+
|
165 |
+
decoded_preds = cfg.tokenizer.batch_decode(preds, skip_special_tokens=True)
|
166 |
+
|
167 |
+
labels = np.where(labels != -100, labels, cfg.tokenizer.pad_token_id)
|
168 |
+
decoded_labels = cfg.tokenizer.batch_decode(labels, skip_special_tokens=True)
|
169 |
+
|
170 |
+
decoded_preds = [
|
171 |
+
canonicalize_str(pred.strip().replace(" ", "")) for pred in decoded_preds
|
172 |
+
]
|
173 |
+
decoded_labels = [
|
174 |
+
[canonicalize_str(label.strip().replace(" ", ""))] for label in decoded_labels
|
175 |
+
]
|
176 |
+
|
177 |
+
score = 0
|
178 |
+
for i in range(len(decoded_preds)):
|
179 |
+
if decoded_preds[i] == decoded_labels[i][0]:
|
180 |
+
score += 1
|
181 |
+
score /= len(decoded_preds)
|
182 |
+
return {"accuracy": score}
|
183 |
+
|
184 |
+
|
185 |
+
def get_accuracy_score_multitask(eval_preds, cfg):
|
186 |
+
preds, labels = eval_preds
|
187 |
+
if isinstance(preds, tuple):
|
188 |
+
preds = preds[0]
|
189 |
+
|
190 |
+
special_tokens = cfg.tokenizer.special_tokens_map
|
191 |
+
special_tokens = [
|
192 |
+
special_tokens["eos_token"],
|
193 |
+
special_tokens["pad_token"],
|
194 |
+
special_tokens["unk_token"],
|
195 |
+
] + list(
|
196 |
+
set(special_tokens["additional_special_tokens"])
|
197 |
+
- set(
|
198 |
+
[
|
199 |
+
"0%",
|
200 |
+
"10%",
|
201 |
+
"20%",
|
202 |
+
"30%",
|
203 |
+
"40%",
|
204 |
+
"50%",
|
205 |
+
"60%",
|
206 |
+
"70%",
|
207 |
+
"80%",
|
208 |
+
"90%",
|
209 |
+
"100%",
|
210 |
+
]
|
211 |
+
)
|
212 |
+
)
|
213 |
+
|
214 |
+
decoded_preds = cfg.tokenizer.batch_decode(preds, skip_special_tokens=False)
|
215 |
+
for special_token in special_tokens:
|
216 |
+
decoded_preds = [pred.replace(special_token, "") for pred in decoded_preds]
|
217 |
+
|
218 |
+
labels = np.where(labels != -100, labels, cfg.tokenizer.pad_token_id)
|
219 |
+
decoded_labels = cfg.tokenizer.batch_decode(labels, skip_special_tokens=False)
|
220 |
+
for special_token in special_tokens:
|
221 |
+
decoded_labels = [pred.replace(special_token, "") for pred in decoded_labels]
|
222 |
+
|
223 |
+
decoded_preds = [
|
224 |
+
canonicalize_str(pred.strip().replace(" ", "")) for pred in decoded_preds
|
225 |
+
]
|
226 |
+
decoded_labels = [
|
227 |
+
[canonicalize_str(label.strip().replace(" ", ""))] for label in decoded_labels
|
228 |
+
]
|
229 |
+
|
230 |
+
score = 0
|
231 |
+
for i in range(len(decoded_preds)):
|
232 |
+
if decoded_preds[i] == decoded_labels[i][0]:
|
233 |
+
score += 1
|
234 |
+
score /= len(decoded_preds)
|
235 |
+
return {"accuracy": score}
|
236 |
+
|
237 |
+
|
238 |
+
def preprocess_dataset(examples, cfg):
|
239 |
+
inputs = examples["input"]
|
240 |
+
targets = examples[cfg.target_column]
|
241 |
+
model_inputs = cfg.tokenizer(
|
242 |
+
inputs, max_length=cfg.input_max_length, truncation=True
|
243 |
+
)
|
244 |
+
labels = cfg.tokenizer(targets, max_length=cfg.target_max_length, truncation=True)
|
245 |
+
model_inputs["labels"] = labels["input_ids"]
|
246 |
+
return model_inputs
|
247 |
+
|
248 |
+
|
249 |
+
def filter_out(df, col_names):
|
250 |
+
for col_name in col_names:
|
251 |
+
df = df[~df[col_name].isna()].reset_index(drop=True)
|
252 |
+
return df
|
253 |
+
|
254 |
+
|
255 |
+
def save_pickle(path: str, contents):
|
256 |
+
"""Saves contents to a pickle file."""
|
257 |
+
with open(path, "wb") as f:
|
258 |
+
pickle.dump(contents, f)
|
259 |
+
|
260 |
+
|
261 |
+
def load_pickle(path: str):
|
262 |
+
"""Loads contents from a pickle file."""
|
263 |
+
with open(path, "rb") as f:
|
264 |
+
return pickle.load(f)
|
265 |
+
|
266 |
+
|
267 |
+
def add_new_tokens(tokenizer, file_path):
|
268 |
+
"""
|
269 |
+
Adds new tokens to the tokenizer from a file.
|
270 |
+
The file should contain one token per line.
|
271 |
+
"""
|
272 |
+
with open(file_path, "r") as f:
|
273 |
+
new_tokens = [line.strip() for line in f if line.strip()]
|
274 |
+
|
275 |
+
tokenizer.add_tokens(new_tokens)
|
276 |
+
|
277 |
+
return tokenizer
|