Upload processor
Browse files- added_tokens.json +1027 -0
- merges.txt +0 -0
- preprocessor_config.json +28 -0
- processing_travisionlm.py +276 -0
- processor_config.json +6 -0
- special_tokens_map.json +39 -0
- tokenizer.json +0 -0
- tokenizer_config.json +0 -0
- vocab.json +0 -0
added_tokens.json
ADDED
@@ -0,0 +1,1027 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<image>": 50257,
|
3 |
+
"<loc0000>": 50258,
|
4 |
+
"<loc0001>": 50259,
|
5 |
+
"<loc0002>": 50260,
|
6 |
+
"<loc0003>": 50261,
|
7 |
+
"<loc0004>": 50262,
|
8 |
+
"<loc0005>": 50263,
|
9 |
+
"<loc0006>": 50264,
|
10 |
+
"<loc0007>": 50265,
|
11 |
+
"<loc0008>": 50266,
|
12 |
+
"<loc0009>": 50267,
|
13 |
+
"<loc0010>": 50268,
|
14 |
+
"<loc0011>": 50269,
|
15 |
+
"<loc0012>": 50270,
|
16 |
+
"<loc0013>": 50271,
|
17 |
+
"<loc0014>": 50272,
|
18 |
+
"<loc0015>": 50273,
|
19 |
+
"<loc0016>": 50274,
|
20 |
+
"<loc0017>": 50275,
|
21 |
+
"<loc0018>": 50276,
|
22 |
+
"<loc0019>": 50277,
|
23 |
+
"<loc0020>": 50278,
|
24 |
+
"<loc0021>": 50279,
|
25 |
+
"<loc0022>": 50280,
|
26 |
+
"<loc0023>": 50281,
|
27 |
+
"<loc0024>": 50282,
|
28 |
+
"<loc0025>": 50283,
|
29 |
+
"<loc0026>": 50284,
|
30 |
+
"<loc0027>": 50285,
|
31 |
+
"<loc0028>": 50286,
|
32 |
+
"<loc0029>": 50287,
|
33 |
+
"<loc0030>": 50288,
|
34 |
+
"<loc0031>": 50289,
|
35 |
+
"<loc0032>": 50290,
|
36 |
+
"<loc0033>": 50291,
|
37 |
+
"<loc0034>": 50292,
|
38 |
+
"<loc0035>": 50293,
|
39 |
+
"<loc0036>": 50294,
|
40 |
+
"<loc0037>": 50295,
|
41 |
+
"<loc0038>": 50296,
|
42 |
+
"<loc0039>": 50297,
|
43 |
+
"<loc0040>": 50298,
|
44 |
+
"<loc0041>": 50299,
|
45 |
+
"<loc0042>": 50300,
|
46 |
+
"<loc0043>": 50301,
|
47 |
+
"<loc0044>": 50302,
|
48 |
+
"<loc0045>": 50303,
|
49 |
+
"<loc0046>": 50304,
|
50 |
+
"<loc0047>": 50305,
|
51 |
+
"<loc0048>": 50306,
|
52 |
+
"<loc0049>": 50307,
|
53 |
+
"<loc0050>": 50308,
|
54 |
+
"<loc0051>": 50309,
|
55 |
+
"<loc0052>": 50310,
|
56 |
+
"<loc0053>": 50311,
|
57 |
+
"<loc0054>": 50312,
|
58 |
+
"<loc0055>": 50313,
|
59 |
+
"<loc0056>": 50314,
|
60 |
+
"<loc0057>": 50315,
|
61 |
+
"<loc0058>": 50316,
|
62 |
+
"<loc0059>": 50317,
|
63 |
+
"<loc0060>": 50318,
|
64 |
+
"<loc0061>": 50319,
|
65 |
+
"<loc0062>": 50320,
|
66 |
+
"<loc0063>": 50321,
|
67 |
+
"<loc0064>": 50322,
|
68 |
+
"<loc0065>": 50323,
|
69 |
+
"<loc0066>": 50324,
|
70 |
+
"<loc0067>": 50325,
|
71 |
+
"<loc0068>": 50326,
|
72 |
+
"<loc0069>": 50327,
|
73 |
+
"<loc0070>": 50328,
|
74 |
+
"<loc0071>": 50329,
|
75 |
+
"<loc0072>": 50330,
|
76 |
+
"<loc0073>": 50331,
|
77 |
+
"<loc0074>": 50332,
|
78 |
+
"<loc0075>": 50333,
|
79 |
+
"<loc0076>": 50334,
|
80 |
+
"<loc0077>": 50335,
|
81 |
+
"<loc0078>": 50336,
|
82 |
+
"<loc0079>": 50337,
|
83 |
+
"<loc0080>": 50338,
|
84 |
+
"<loc0081>": 50339,
|
85 |
+
"<loc0082>": 50340,
|
86 |
+
"<loc0083>": 50341,
|
87 |
+
"<loc0084>": 50342,
|
88 |
+
"<loc0085>": 50343,
|
89 |
+
"<loc0086>": 50344,
|
90 |
+
"<loc0087>": 50345,
|
91 |
+
"<loc0088>": 50346,
|
92 |
+
"<loc0089>": 50347,
|
93 |
+
"<loc0090>": 50348,
|
94 |
+
"<loc0091>": 50349,
|
95 |
+
"<loc0092>": 50350,
|
96 |
+
"<loc0093>": 50351,
|
97 |
+
"<loc0094>": 50352,
|
98 |
+
"<loc0095>": 50353,
|
99 |
+
"<loc0096>": 50354,
|
100 |
+
"<loc0097>": 50355,
|
101 |
+
"<loc0098>": 50356,
|
102 |
+
"<loc0099>": 50357,
|
103 |
+
"<loc0100>": 50358,
|
104 |
+
"<loc0101>": 50359,
|
105 |
+
"<loc0102>": 50360,
|
106 |
+
"<loc0103>": 50361,
|
107 |
+
"<loc0104>": 50362,
|
108 |
+
"<loc0105>": 50363,
|
109 |
+
"<loc0106>": 50364,
|
110 |
+
"<loc0107>": 50365,
|
111 |
+
"<loc0108>": 50366,
|
112 |
+
"<loc0109>": 50367,
|
113 |
+
"<loc0110>": 50368,
|
114 |
+
"<loc0111>": 50369,
|
115 |
+
"<loc0112>": 50370,
|
116 |
+
"<loc0113>": 50371,
|
117 |
+
"<loc0114>": 50372,
|
118 |
+
"<loc0115>": 50373,
|
119 |
+
"<loc0116>": 50374,
|
120 |
+
"<loc0117>": 50375,
|
121 |
+
"<loc0118>": 50376,
|
122 |
+
"<loc0119>": 50377,
|
123 |
+
"<loc0120>": 50378,
|
124 |
+
"<loc0121>": 50379,
|
125 |
+
"<loc0122>": 50380,
|
126 |
+
"<loc0123>": 50381,
|
127 |
+
"<loc0124>": 50382,
|
128 |
+
"<loc0125>": 50383,
|
129 |
+
"<loc0126>": 50384,
|
130 |
+
"<loc0127>": 50385,
|
131 |
+
"<loc0128>": 50386,
|
132 |
+
"<loc0129>": 50387,
|
133 |
+
"<loc0130>": 50388,
|
134 |
+
"<loc0131>": 50389,
|
135 |
+
"<loc0132>": 50390,
|
136 |
+
"<loc0133>": 50391,
|
137 |
+
"<loc0134>": 50392,
|
138 |
+
"<loc0135>": 50393,
|
139 |
+
"<loc0136>": 50394,
|
140 |
+
"<loc0137>": 50395,
|
141 |
+
"<loc0138>": 50396,
|
142 |
+
"<loc0139>": 50397,
|
143 |
+
"<loc0140>": 50398,
|
144 |
+
"<loc0141>": 50399,
|
145 |
+
"<loc0142>": 50400,
|
146 |
+
"<loc0143>": 50401,
|
147 |
+
"<loc0144>": 50402,
|
148 |
+
"<loc0145>": 50403,
|
149 |
+
"<loc0146>": 50404,
|
150 |
+
"<loc0147>": 50405,
|
151 |
+
"<loc0148>": 50406,
|
152 |
+
"<loc0149>": 50407,
|
153 |
+
"<loc0150>": 50408,
|
154 |
+
"<loc0151>": 50409,
|
155 |
+
"<loc0152>": 50410,
|
156 |
+
"<loc0153>": 50411,
|
157 |
+
"<loc0154>": 50412,
|
158 |
+
"<loc0155>": 50413,
|
159 |
+
"<loc0156>": 50414,
|
160 |
+
"<loc0157>": 50415,
|
161 |
+
"<loc0158>": 50416,
|
162 |
+
"<loc0159>": 50417,
|
163 |
+
"<loc0160>": 50418,
|
164 |
+
"<loc0161>": 50419,
|
165 |
+
"<loc0162>": 50420,
|
166 |
+
"<loc0163>": 50421,
|
167 |
+
"<loc0164>": 50422,
|
168 |
+
"<loc0165>": 50423,
|
169 |
+
"<loc0166>": 50424,
|
170 |
+
"<loc0167>": 50425,
|
171 |
+
"<loc0168>": 50426,
|
172 |
+
"<loc0169>": 50427,
|
173 |
+
"<loc0170>": 50428,
|
174 |
+
"<loc0171>": 50429,
|
175 |
+
"<loc0172>": 50430,
|
176 |
+
"<loc0173>": 50431,
|
177 |
+
"<loc0174>": 50432,
|
178 |
+
"<loc0175>": 50433,
|
179 |
+
"<loc0176>": 50434,
|
180 |
+
"<loc0177>": 50435,
|
181 |
+
"<loc0178>": 50436,
|
182 |
+
"<loc0179>": 50437,
|
183 |
+
"<loc0180>": 50438,
|
184 |
+
"<loc0181>": 50439,
|
185 |
+
"<loc0182>": 50440,
|
186 |
+
"<loc0183>": 50441,
|
187 |
+
"<loc0184>": 50442,
|
188 |
+
"<loc0185>": 50443,
|
189 |
+
"<loc0186>": 50444,
|
190 |
+
"<loc0187>": 50445,
|
191 |
+
"<loc0188>": 50446,
|
192 |
+
"<loc0189>": 50447,
|
193 |
+
"<loc0190>": 50448,
|
194 |
+
"<loc0191>": 50449,
|
195 |
+
"<loc0192>": 50450,
|
196 |
+
"<loc0193>": 50451,
|
197 |
+
"<loc0194>": 50452,
|
198 |
+
"<loc0195>": 50453,
|
199 |
+
"<loc0196>": 50454,
|
200 |
+
"<loc0197>": 50455,
|
201 |
+
"<loc0198>": 50456,
|
202 |
+
"<loc0199>": 50457,
|
203 |
+
"<loc0200>": 50458,
|
204 |
+
"<loc0201>": 50459,
|
205 |
+
"<loc0202>": 50460,
|
206 |
+
"<loc0203>": 50461,
|
207 |
+
"<loc0204>": 50462,
|
208 |
+
"<loc0205>": 50463,
|
209 |
+
"<loc0206>": 50464,
|
210 |
+
"<loc0207>": 50465,
|
211 |
+
"<loc0208>": 50466,
|
212 |
+
"<loc0209>": 50467,
|
213 |
+
"<loc0210>": 50468,
|
214 |
+
"<loc0211>": 50469,
|
215 |
+
"<loc0212>": 50470,
|
216 |
+
"<loc0213>": 50471,
|
217 |
+
"<loc0214>": 50472,
|
218 |
+
"<loc0215>": 50473,
|
219 |
+
"<loc0216>": 50474,
|
220 |
+
"<loc0217>": 50475,
|
221 |
+
"<loc0218>": 50476,
|
222 |
+
"<loc0219>": 50477,
|
223 |
+
"<loc0220>": 50478,
|
224 |
+
"<loc0221>": 50479,
|
225 |
+
"<loc0222>": 50480,
|
226 |
+
"<loc0223>": 50481,
|
227 |
+
"<loc0224>": 50482,
|
228 |
+
"<loc0225>": 50483,
|
229 |
+
"<loc0226>": 50484,
|
230 |
+
"<loc0227>": 50485,
|
231 |
+
"<loc0228>": 50486,
|
232 |
+
"<loc0229>": 50487,
|
233 |
+
"<loc0230>": 50488,
|
234 |
+
"<loc0231>": 50489,
|
235 |
+
"<loc0232>": 50490,
|
236 |
+
"<loc0233>": 50491,
|
237 |
+
"<loc0234>": 50492,
|
238 |
+
"<loc0235>": 50493,
|
239 |
+
"<loc0236>": 50494,
|
240 |
+
"<loc0237>": 50495,
|
241 |
+
"<loc0238>": 50496,
|
242 |
+
"<loc0239>": 50497,
|
243 |
+
"<loc0240>": 50498,
|
244 |
+
"<loc0241>": 50499,
|
245 |
+
"<loc0242>": 50500,
|
246 |
+
"<loc0243>": 50501,
|
247 |
+
"<loc0244>": 50502,
|
248 |
+
"<loc0245>": 50503,
|
249 |
+
"<loc0246>": 50504,
|
250 |
+
"<loc0247>": 50505,
|
251 |
+
"<loc0248>": 50506,
|
252 |
+
"<loc0249>": 50507,
|
253 |
+
"<loc0250>": 50508,
|
254 |
+
"<loc0251>": 50509,
|
255 |
+
"<loc0252>": 50510,
|
256 |
+
"<loc0253>": 50511,
|
257 |
+
"<loc0254>": 50512,
|
258 |
+
"<loc0255>": 50513,
|
259 |
+
"<loc0256>": 50514,
|
260 |
+
"<loc0257>": 50515,
|
261 |
+
"<loc0258>": 50516,
|
262 |
+
"<loc0259>": 50517,
|
263 |
+
"<loc0260>": 50518,
|
264 |
+
"<loc0261>": 50519,
|
265 |
+
"<loc0262>": 50520,
|
266 |
+
"<loc0263>": 50521,
|
267 |
+
"<loc0264>": 50522,
|
268 |
+
"<loc0265>": 50523,
|
269 |
+
"<loc0266>": 50524,
|
270 |
+
"<loc0267>": 50525,
|
271 |
+
"<loc0268>": 50526,
|
272 |
+
"<loc0269>": 50527,
|
273 |
+
"<loc0270>": 50528,
|
274 |
+
"<loc0271>": 50529,
|
275 |
+
"<loc0272>": 50530,
|
276 |
+
"<loc0273>": 50531,
|
277 |
+
"<loc0274>": 50532,
|
278 |
+
"<loc0275>": 50533,
|
279 |
+
"<loc0276>": 50534,
|
280 |
+
"<loc0277>": 50535,
|
281 |
+
"<loc0278>": 50536,
|
282 |
+
"<loc0279>": 50537,
|
283 |
+
"<loc0280>": 50538,
|
284 |
+
"<loc0281>": 50539,
|
285 |
+
"<loc0282>": 50540,
|
286 |
+
"<loc0283>": 50541,
|
287 |
+
"<loc0284>": 50542,
|
288 |
+
"<loc0285>": 50543,
|
289 |
+
"<loc0286>": 50544,
|
290 |
+
"<loc0287>": 50545,
|
291 |
+
"<loc0288>": 50546,
|
292 |
+
"<loc0289>": 50547,
|
293 |
+
"<loc0290>": 50548,
|
294 |
+
"<loc0291>": 50549,
|
295 |
+
"<loc0292>": 50550,
|
296 |
+
"<loc0293>": 50551,
|
297 |
+
"<loc0294>": 50552,
|
298 |
+
"<loc0295>": 50553,
|
299 |
+
"<loc0296>": 50554,
|
300 |
+
"<loc0297>": 50555,
|
301 |
+
"<loc0298>": 50556,
|
302 |
+
"<loc0299>": 50557,
|
303 |
+
"<loc0300>": 50558,
|
304 |
+
"<loc0301>": 50559,
|
305 |
+
"<loc0302>": 50560,
|
306 |
+
"<loc0303>": 50561,
|
307 |
+
"<loc0304>": 50562,
|
308 |
+
"<loc0305>": 50563,
|
309 |
+
"<loc0306>": 50564,
|
310 |
+
"<loc0307>": 50565,
|
311 |
+
"<loc0308>": 50566,
|
312 |
+
"<loc0309>": 50567,
|
313 |
+
"<loc0310>": 50568,
|
314 |
+
"<loc0311>": 50569,
|
315 |
+
"<loc0312>": 50570,
|
316 |
+
"<loc0313>": 50571,
|
317 |
+
"<loc0314>": 50572,
|
318 |
+
"<loc0315>": 50573,
|
319 |
+
"<loc0316>": 50574,
|
320 |
+
"<loc0317>": 50575,
|
321 |
+
"<loc0318>": 50576,
|
322 |
+
"<loc0319>": 50577,
|
323 |
+
"<loc0320>": 50578,
|
324 |
+
"<loc0321>": 50579,
|
325 |
+
"<loc0322>": 50580,
|
326 |
+
"<loc0323>": 50581,
|
327 |
+
"<loc0324>": 50582,
|
328 |
+
"<loc0325>": 50583,
|
329 |
+
"<loc0326>": 50584,
|
330 |
+
"<loc0327>": 50585,
|
331 |
+
"<loc0328>": 50586,
|
332 |
+
"<loc0329>": 50587,
|
333 |
+
"<loc0330>": 50588,
|
334 |
+
"<loc0331>": 50589,
|
335 |
+
"<loc0332>": 50590,
|
336 |
+
"<loc0333>": 50591,
|
337 |
+
"<loc0334>": 50592,
|
338 |
+
"<loc0335>": 50593,
|
339 |
+
"<loc0336>": 50594,
|
340 |
+
"<loc0337>": 50595,
|
341 |
+
"<loc0338>": 50596,
|
342 |
+
"<loc0339>": 50597,
|
343 |
+
"<loc0340>": 50598,
|
344 |
+
"<loc0341>": 50599,
|
345 |
+
"<loc0342>": 50600,
|
346 |
+
"<loc0343>": 50601,
|
347 |
+
"<loc0344>": 50602,
|
348 |
+
"<loc0345>": 50603,
|
349 |
+
"<loc0346>": 50604,
|
350 |
+
"<loc0347>": 50605,
|
351 |
+
"<loc0348>": 50606,
|
352 |
+
"<loc0349>": 50607,
|
353 |
+
"<loc0350>": 50608,
|
354 |
+
"<loc0351>": 50609,
|
355 |
+
"<loc0352>": 50610,
|
356 |
+
"<loc0353>": 50611,
|
357 |
+
"<loc0354>": 50612,
|
358 |
+
"<loc0355>": 50613,
|
359 |
+
"<loc0356>": 50614,
|
360 |
+
"<loc0357>": 50615,
|
361 |
+
"<loc0358>": 50616,
|
362 |
+
"<loc0359>": 50617,
|
363 |
+
"<loc0360>": 50618,
|
364 |
+
"<loc0361>": 50619,
|
365 |
+
"<loc0362>": 50620,
|
366 |
+
"<loc0363>": 50621,
|
367 |
+
"<loc0364>": 50622,
|
368 |
+
"<loc0365>": 50623,
|
369 |
+
"<loc0366>": 50624,
|
370 |
+
"<loc0367>": 50625,
|
371 |
+
"<loc0368>": 50626,
|
372 |
+
"<loc0369>": 50627,
|
373 |
+
"<loc0370>": 50628,
|
374 |
+
"<loc0371>": 50629,
|
375 |
+
"<loc0372>": 50630,
|
376 |
+
"<loc0373>": 50631,
|
377 |
+
"<loc0374>": 50632,
|
378 |
+
"<loc0375>": 50633,
|
379 |
+
"<loc0376>": 50634,
|
380 |
+
"<loc0377>": 50635,
|
381 |
+
"<loc0378>": 50636,
|
382 |
+
"<loc0379>": 50637,
|
383 |
+
"<loc0380>": 50638,
|
384 |
+
"<loc0381>": 50639,
|
385 |
+
"<loc0382>": 50640,
|
386 |
+
"<loc0383>": 50641,
|
387 |
+
"<loc0384>": 50642,
|
388 |
+
"<loc0385>": 50643,
|
389 |
+
"<loc0386>": 50644,
|
390 |
+
"<loc0387>": 50645,
|
391 |
+
"<loc0388>": 50646,
|
392 |
+
"<loc0389>": 50647,
|
393 |
+
"<loc0390>": 50648,
|
394 |
+
"<loc0391>": 50649,
|
395 |
+
"<loc0392>": 50650,
|
396 |
+
"<loc0393>": 50651,
|
397 |
+
"<loc0394>": 50652,
|
398 |
+
"<loc0395>": 50653,
|
399 |
+
"<loc0396>": 50654,
|
400 |
+
"<loc0397>": 50655,
|
401 |
+
"<loc0398>": 50656,
|
402 |
+
"<loc0399>": 50657,
|
403 |
+
"<loc0400>": 50658,
|
404 |
+
"<loc0401>": 50659,
|
405 |
+
"<loc0402>": 50660,
|
406 |
+
"<loc0403>": 50661,
|
407 |
+
"<loc0404>": 50662,
|
408 |
+
"<loc0405>": 50663,
|
409 |
+
"<loc0406>": 50664,
|
410 |
+
"<loc0407>": 50665,
|
411 |
+
"<loc0408>": 50666,
|
412 |
+
"<loc0409>": 50667,
|
413 |
+
"<loc0410>": 50668,
|
414 |
+
"<loc0411>": 50669,
|
415 |
+
"<loc0412>": 50670,
|
416 |
+
"<loc0413>": 50671,
|
417 |
+
"<loc0414>": 50672,
|
418 |
+
"<loc0415>": 50673,
|
419 |
+
"<loc0416>": 50674,
|
420 |
+
"<loc0417>": 50675,
|
421 |
+
"<loc0418>": 50676,
|
422 |
+
"<loc0419>": 50677,
|
423 |
+
"<loc0420>": 50678,
|
424 |
+
"<loc0421>": 50679,
|
425 |
+
"<loc0422>": 50680,
|
426 |
+
"<loc0423>": 50681,
|
427 |
+
"<loc0424>": 50682,
|
428 |
+
"<loc0425>": 50683,
|
429 |
+
"<loc0426>": 50684,
|
430 |
+
"<loc0427>": 50685,
|
431 |
+
"<loc0428>": 50686,
|
432 |
+
"<loc0429>": 50687,
|
433 |
+
"<loc0430>": 50688,
|
434 |
+
"<loc0431>": 50689,
|
435 |
+
"<loc0432>": 50690,
|
436 |
+
"<loc0433>": 50691,
|
437 |
+
"<loc0434>": 50692,
|
438 |
+
"<loc0435>": 50693,
|
439 |
+
"<loc0436>": 50694,
|
440 |
+
"<loc0437>": 50695,
|
441 |
+
"<loc0438>": 50696,
|
442 |
+
"<loc0439>": 50697,
|
443 |
+
"<loc0440>": 50698,
|
444 |
+
"<loc0441>": 50699,
|
445 |
+
"<loc0442>": 50700,
|
446 |
+
"<loc0443>": 50701,
|
447 |
+
"<loc0444>": 50702,
|
448 |
+
"<loc0445>": 50703,
|
449 |
+
"<loc0446>": 50704,
|
450 |
+
"<loc0447>": 50705,
|
451 |
+
"<loc0448>": 50706,
|
452 |
+
"<loc0449>": 50707,
|
453 |
+
"<loc0450>": 50708,
|
454 |
+
"<loc0451>": 50709,
|
455 |
+
"<loc0452>": 50710,
|
456 |
+
"<loc0453>": 50711,
|
457 |
+
"<loc0454>": 50712,
|
458 |
+
"<loc0455>": 50713,
|
459 |
+
"<loc0456>": 50714,
|
460 |
+
"<loc0457>": 50715,
|
461 |
+
"<loc0458>": 50716,
|
462 |
+
"<loc0459>": 50717,
|
463 |
+
"<loc0460>": 50718,
|
464 |
+
"<loc0461>": 50719,
|
465 |
+
"<loc0462>": 50720,
|
466 |
+
"<loc0463>": 50721,
|
467 |
+
"<loc0464>": 50722,
|
468 |
+
"<loc0465>": 50723,
|
469 |
+
"<loc0466>": 50724,
|
470 |
+
"<loc0467>": 50725,
|
471 |
+
"<loc0468>": 50726,
|
472 |
+
"<loc0469>": 50727,
|
473 |
+
"<loc0470>": 50728,
|
474 |
+
"<loc0471>": 50729,
|
475 |
+
"<loc0472>": 50730,
|
476 |
+
"<loc0473>": 50731,
|
477 |
+
"<loc0474>": 50732,
|
478 |
+
"<loc0475>": 50733,
|
479 |
+
"<loc0476>": 50734,
|
480 |
+
"<loc0477>": 50735,
|
481 |
+
"<loc0478>": 50736,
|
482 |
+
"<loc0479>": 50737,
|
483 |
+
"<loc0480>": 50738,
|
484 |
+
"<loc0481>": 50739,
|
485 |
+
"<loc0482>": 50740,
|
486 |
+
"<loc0483>": 50741,
|
487 |
+
"<loc0484>": 50742,
|
488 |
+
"<loc0485>": 50743,
|
489 |
+
"<loc0486>": 50744,
|
490 |
+
"<loc0487>": 50745,
|
491 |
+
"<loc0488>": 50746,
|
492 |
+
"<loc0489>": 50747,
|
493 |
+
"<loc0490>": 50748,
|
494 |
+
"<loc0491>": 50749,
|
495 |
+
"<loc0492>": 50750,
|
496 |
+
"<loc0493>": 50751,
|
497 |
+
"<loc0494>": 50752,
|
498 |
+
"<loc0495>": 50753,
|
499 |
+
"<loc0496>": 50754,
|
500 |
+
"<loc0497>": 50755,
|
501 |
+
"<loc0498>": 50756,
|
502 |
+
"<loc0499>": 50757,
|
503 |
+
"<loc0500>": 50758,
|
504 |
+
"<loc0501>": 50759,
|
505 |
+
"<loc0502>": 50760,
|
506 |
+
"<loc0503>": 50761,
|
507 |
+
"<loc0504>": 50762,
|
508 |
+
"<loc0505>": 50763,
|
509 |
+
"<loc0506>": 50764,
|
510 |
+
"<loc0507>": 50765,
|
511 |
+
"<loc0508>": 50766,
|
512 |
+
"<loc0509>": 50767,
|
513 |
+
"<loc0510>": 50768,
|
514 |
+
"<loc0511>": 50769,
|
515 |
+
"<loc0512>": 50770,
|
516 |
+
"<loc0513>": 50771,
|
517 |
+
"<loc0514>": 50772,
|
518 |
+
"<loc0515>": 50773,
|
519 |
+
"<loc0516>": 50774,
|
520 |
+
"<loc0517>": 50775,
|
521 |
+
"<loc0518>": 50776,
|
522 |
+
"<loc0519>": 50777,
|
523 |
+
"<loc0520>": 50778,
|
524 |
+
"<loc0521>": 50779,
|
525 |
+
"<loc0522>": 50780,
|
526 |
+
"<loc0523>": 50781,
|
527 |
+
"<loc0524>": 50782,
|
528 |
+
"<loc0525>": 50783,
|
529 |
+
"<loc0526>": 50784,
|
530 |
+
"<loc0527>": 50785,
|
531 |
+
"<loc0528>": 50786,
|
532 |
+
"<loc0529>": 50787,
|
533 |
+
"<loc0530>": 50788,
|
534 |
+
"<loc0531>": 50789,
|
535 |
+
"<loc0532>": 50790,
|
536 |
+
"<loc0533>": 50791,
|
537 |
+
"<loc0534>": 50792,
|
538 |
+
"<loc0535>": 50793,
|
539 |
+
"<loc0536>": 50794,
|
540 |
+
"<loc0537>": 50795,
|
541 |
+
"<loc0538>": 50796,
|
542 |
+
"<loc0539>": 50797,
|
543 |
+
"<loc0540>": 50798,
|
544 |
+
"<loc0541>": 50799,
|
545 |
+
"<loc0542>": 50800,
|
546 |
+
"<loc0543>": 50801,
|
547 |
+
"<loc0544>": 50802,
|
548 |
+
"<loc0545>": 50803,
|
549 |
+
"<loc0546>": 50804,
|
550 |
+
"<loc0547>": 50805,
|
551 |
+
"<loc0548>": 50806,
|
552 |
+
"<loc0549>": 50807,
|
553 |
+
"<loc0550>": 50808,
|
554 |
+
"<loc0551>": 50809,
|
555 |
+
"<loc0552>": 50810,
|
556 |
+
"<loc0553>": 50811,
|
557 |
+
"<loc0554>": 50812,
|
558 |
+
"<loc0555>": 50813,
|
559 |
+
"<loc0556>": 50814,
|
560 |
+
"<loc0557>": 50815,
|
561 |
+
"<loc0558>": 50816,
|
562 |
+
"<loc0559>": 50817,
|
563 |
+
"<loc0560>": 50818,
|
564 |
+
"<loc0561>": 50819,
|
565 |
+
"<loc0562>": 50820,
|
566 |
+
"<loc0563>": 50821,
|
567 |
+
"<loc0564>": 50822,
|
568 |
+
"<loc0565>": 50823,
|
569 |
+
"<loc0566>": 50824,
|
570 |
+
"<loc0567>": 50825,
|
571 |
+
"<loc0568>": 50826,
|
572 |
+
"<loc0569>": 50827,
|
573 |
+
"<loc0570>": 50828,
|
574 |
+
"<loc0571>": 50829,
|
575 |
+
"<loc0572>": 50830,
|
576 |
+
"<loc0573>": 50831,
|
577 |
+
"<loc0574>": 50832,
|
578 |
+
"<loc0575>": 50833,
|
579 |
+
"<loc0576>": 50834,
|
580 |
+
"<loc0577>": 50835,
|
581 |
+
"<loc0578>": 50836,
|
582 |
+
"<loc0579>": 50837,
|
583 |
+
"<loc0580>": 50838,
|
584 |
+
"<loc0581>": 50839,
|
585 |
+
"<loc0582>": 50840,
|
586 |
+
"<loc0583>": 50841,
|
587 |
+
"<loc0584>": 50842,
|
588 |
+
"<loc0585>": 50843,
|
589 |
+
"<loc0586>": 50844,
|
590 |
+
"<loc0587>": 50845,
|
591 |
+
"<loc0588>": 50846,
|
592 |
+
"<loc0589>": 50847,
|
593 |
+
"<loc0590>": 50848,
|
594 |
+
"<loc0591>": 50849,
|
595 |
+
"<loc0592>": 50850,
|
596 |
+
"<loc0593>": 50851,
|
597 |
+
"<loc0594>": 50852,
|
598 |
+
"<loc0595>": 50853,
|
599 |
+
"<loc0596>": 50854,
|
600 |
+
"<loc0597>": 50855,
|
601 |
+
"<loc0598>": 50856,
|
602 |
+
"<loc0599>": 50857,
|
603 |
+
"<loc0600>": 50858,
|
604 |
+
"<loc0601>": 50859,
|
605 |
+
"<loc0602>": 50860,
|
606 |
+
"<loc0603>": 50861,
|
607 |
+
"<loc0604>": 50862,
|
608 |
+
"<loc0605>": 50863,
|
609 |
+
"<loc0606>": 50864,
|
610 |
+
"<loc0607>": 50865,
|
611 |
+
"<loc0608>": 50866,
|
612 |
+
"<loc0609>": 50867,
|
613 |
+
"<loc0610>": 50868,
|
614 |
+
"<loc0611>": 50869,
|
615 |
+
"<loc0612>": 50870,
|
616 |
+
"<loc0613>": 50871,
|
617 |
+
"<loc0614>": 50872,
|
618 |
+
"<loc0615>": 50873,
|
619 |
+
"<loc0616>": 50874,
|
620 |
+
"<loc0617>": 50875,
|
621 |
+
"<loc0618>": 50876,
|
622 |
+
"<loc0619>": 50877,
|
623 |
+
"<loc0620>": 50878,
|
624 |
+
"<loc0621>": 50879,
|
625 |
+
"<loc0622>": 50880,
|
626 |
+
"<loc0623>": 50881,
|
627 |
+
"<loc0624>": 50882,
|
628 |
+
"<loc0625>": 50883,
|
629 |
+
"<loc0626>": 50884,
|
630 |
+
"<loc0627>": 50885,
|
631 |
+
"<loc0628>": 50886,
|
632 |
+
"<loc0629>": 50887,
|
633 |
+
"<loc0630>": 50888,
|
634 |
+
"<loc0631>": 50889,
|
635 |
+
"<loc0632>": 50890,
|
636 |
+
"<loc0633>": 50891,
|
637 |
+
"<loc0634>": 50892,
|
638 |
+
"<loc0635>": 50893,
|
639 |
+
"<loc0636>": 50894,
|
640 |
+
"<loc0637>": 50895,
|
641 |
+
"<loc0638>": 50896,
|
642 |
+
"<loc0639>": 50897,
|
643 |
+
"<loc0640>": 50898,
|
644 |
+
"<loc0641>": 50899,
|
645 |
+
"<loc0642>": 50900,
|
646 |
+
"<loc0643>": 50901,
|
647 |
+
"<loc0644>": 50902,
|
648 |
+
"<loc0645>": 50903,
|
649 |
+
"<loc0646>": 50904,
|
650 |
+
"<loc0647>": 50905,
|
651 |
+
"<loc0648>": 50906,
|
652 |
+
"<loc0649>": 50907,
|
653 |
+
"<loc0650>": 50908,
|
654 |
+
"<loc0651>": 50909,
|
655 |
+
"<loc0652>": 50910,
|
656 |
+
"<loc0653>": 50911,
|
657 |
+
"<loc0654>": 50912,
|
658 |
+
"<loc0655>": 50913,
|
659 |
+
"<loc0656>": 50914,
|
660 |
+
"<loc0657>": 50915,
|
661 |
+
"<loc0658>": 50916,
|
662 |
+
"<loc0659>": 50917,
|
663 |
+
"<loc0660>": 50918,
|
664 |
+
"<loc0661>": 50919,
|
665 |
+
"<loc0662>": 50920,
|
666 |
+
"<loc0663>": 50921,
|
667 |
+
"<loc0664>": 50922,
|
668 |
+
"<loc0665>": 50923,
|
669 |
+
"<loc0666>": 50924,
|
670 |
+
"<loc0667>": 50925,
|
671 |
+
"<loc0668>": 50926,
|
672 |
+
"<loc0669>": 50927,
|
673 |
+
"<loc0670>": 50928,
|
674 |
+
"<loc0671>": 50929,
|
675 |
+
"<loc0672>": 50930,
|
676 |
+
"<loc0673>": 50931,
|
677 |
+
"<loc0674>": 50932,
|
678 |
+
"<loc0675>": 50933,
|
679 |
+
"<loc0676>": 50934,
|
680 |
+
"<loc0677>": 50935,
|
681 |
+
"<loc0678>": 50936,
|
682 |
+
"<loc0679>": 50937,
|
683 |
+
"<loc0680>": 50938,
|
684 |
+
"<loc0681>": 50939,
|
685 |
+
"<loc0682>": 50940,
|
686 |
+
"<loc0683>": 50941,
|
687 |
+
"<loc0684>": 50942,
|
688 |
+
"<loc0685>": 50943,
|
689 |
+
"<loc0686>": 50944,
|
690 |
+
"<loc0687>": 50945,
|
691 |
+
"<loc0688>": 50946,
|
692 |
+
"<loc0689>": 50947,
|
693 |
+
"<loc0690>": 50948,
|
694 |
+
"<loc0691>": 50949,
|
695 |
+
"<loc0692>": 50950,
|
696 |
+
"<loc0693>": 50951,
|
697 |
+
"<loc0694>": 50952,
|
698 |
+
"<loc0695>": 50953,
|
699 |
+
"<loc0696>": 50954,
|
700 |
+
"<loc0697>": 50955,
|
701 |
+
"<loc0698>": 50956,
|
702 |
+
"<loc0699>": 50957,
|
703 |
+
"<loc0700>": 50958,
|
704 |
+
"<loc0701>": 50959,
|
705 |
+
"<loc0702>": 50960,
|
706 |
+
"<loc0703>": 50961,
|
707 |
+
"<loc0704>": 50962,
|
708 |
+
"<loc0705>": 50963,
|
709 |
+
"<loc0706>": 50964,
|
710 |
+
"<loc0707>": 50965,
|
711 |
+
"<loc0708>": 50966,
|
712 |
+
"<loc0709>": 50967,
|
713 |
+
"<loc0710>": 50968,
|
714 |
+
"<loc0711>": 50969,
|
715 |
+
"<loc0712>": 50970,
|
716 |
+
"<loc0713>": 50971,
|
717 |
+
"<loc0714>": 50972,
|
718 |
+
"<loc0715>": 50973,
|
719 |
+
"<loc0716>": 50974,
|
720 |
+
"<loc0717>": 50975,
|
721 |
+
"<loc0718>": 50976,
|
722 |
+
"<loc0719>": 50977,
|
723 |
+
"<loc0720>": 50978,
|
724 |
+
"<loc0721>": 50979,
|
725 |
+
"<loc0722>": 50980,
|
726 |
+
"<loc0723>": 50981,
|
727 |
+
"<loc0724>": 50982,
|
728 |
+
"<loc0725>": 50983,
|
729 |
+
"<loc0726>": 50984,
|
730 |
+
"<loc0727>": 50985,
|
731 |
+
"<loc0728>": 50986,
|
732 |
+
"<loc0729>": 50987,
|
733 |
+
"<loc0730>": 50988,
|
734 |
+
"<loc0731>": 50989,
|
735 |
+
"<loc0732>": 50990,
|
736 |
+
"<loc0733>": 50991,
|
737 |
+
"<loc0734>": 50992,
|
738 |
+
"<loc0735>": 50993,
|
739 |
+
"<loc0736>": 50994,
|
740 |
+
"<loc0737>": 50995,
|
741 |
+
"<loc0738>": 50996,
|
742 |
+
"<loc0739>": 50997,
|
743 |
+
"<loc0740>": 50998,
|
744 |
+
"<loc0741>": 50999,
|
745 |
+
"<loc0742>": 51000,
|
746 |
+
"<loc0743>": 51001,
|
747 |
+
"<loc0744>": 51002,
|
748 |
+
"<loc0745>": 51003,
|
749 |
+
"<loc0746>": 51004,
|
750 |
+
"<loc0747>": 51005,
|
751 |
+
"<loc0748>": 51006,
|
752 |
+
"<loc0749>": 51007,
|
753 |
+
"<loc0750>": 51008,
|
754 |
+
"<loc0751>": 51009,
|
755 |
+
"<loc0752>": 51010,
|
756 |
+
"<loc0753>": 51011,
|
757 |
+
"<loc0754>": 51012,
|
758 |
+
"<loc0755>": 51013,
|
759 |
+
"<loc0756>": 51014,
|
760 |
+
"<loc0757>": 51015,
|
761 |
+
"<loc0758>": 51016,
|
762 |
+
"<loc0759>": 51017,
|
763 |
+
"<loc0760>": 51018,
|
764 |
+
"<loc0761>": 51019,
|
765 |
+
"<loc0762>": 51020,
|
766 |
+
"<loc0763>": 51021,
|
767 |
+
"<loc0764>": 51022,
|
768 |
+
"<loc0765>": 51023,
|
769 |
+
"<loc0766>": 51024,
|
770 |
+
"<loc0767>": 51025,
|
771 |
+
"<loc0768>": 51026,
|
772 |
+
"<loc0769>": 51027,
|
773 |
+
"<loc0770>": 51028,
|
774 |
+
"<loc0771>": 51029,
|
775 |
+
"<loc0772>": 51030,
|
776 |
+
"<loc0773>": 51031,
|
777 |
+
"<loc0774>": 51032,
|
778 |
+
"<loc0775>": 51033,
|
779 |
+
"<loc0776>": 51034,
|
780 |
+
"<loc0777>": 51035,
|
781 |
+
"<loc0778>": 51036,
|
782 |
+
"<loc0779>": 51037,
|
783 |
+
"<loc0780>": 51038,
|
784 |
+
"<loc0781>": 51039,
|
785 |
+
"<loc0782>": 51040,
|
786 |
+
"<loc0783>": 51041,
|
787 |
+
"<loc0784>": 51042,
|
788 |
+
"<loc0785>": 51043,
|
789 |
+
"<loc0786>": 51044,
|
790 |
+
"<loc0787>": 51045,
|
791 |
+
"<loc0788>": 51046,
|
792 |
+
"<loc0789>": 51047,
|
793 |
+
"<loc0790>": 51048,
|
794 |
+
"<loc0791>": 51049,
|
795 |
+
"<loc0792>": 51050,
|
796 |
+
"<loc0793>": 51051,
|
797 |
+
"<loc0794>": 51052,
|
798 |
+
"<loc0795>": 51053,
|
799 |
+
"<loc0796>": 51054,
|
800 |
+
"<loc0797>": 51055,
|
801 |
+
"<loc0798>": 51056,
|
802 |
+
"<loc0799>": 51057,
|
803 |
+
"<loc0800>": 51058,
|
804 |
+
"<loc0801>": 51059,
|
805 |
+
"<loc0802>": 51060,
|
806 |
+
"<loc0803>": 51061,
|
807 |
+
"<loc0804>": 51062,
|
808 |
+
"<loc0805>": 51063,
|
809 |
+
"<loc0806>": 51064,
|
810 |
+
"<loc0807>": 51065,
|
811 |
+
"<loc0808>": 51066,
|
812 |
+
"<loc0809>": 51067,
|
813 |
+
"<loc0810>": 51068,
|
814 |
+
"<loc0811>": 51069,
|
815 |
+
"<loc0812>": 51070,
|
816 |
+
"<loc0813>": 51071,
|
817 |
+
"<loc0814>": 51072,
|
818 |
+
"<loc0815>": 51073,
|
819 |
+
"<loc0816>": 51074,
|
820 |
+
"<loc0817>": 51075,
|
821 |
+
"<loc0818>": 51076,
|
822 |
+
"<loc0819>": 51077,
|
823 |
+
"<loc0820>": 51078,
|
824 |
+
"<loc0821>": 51079,
|
825 |
+
"<loc0822>": 51080,
|
826 |
+
"<loc0823>": 51081,
|
827 |
+
"<loc0824>": 51082,
|
828 |
+
"<loc0825>": 51083,
|
829 |
+
"<loc0826>": 51084,
|
830 |
+
"<loc0827>": 51085,
|
831 |
+
"<loc0828>": 51086,
|
832 |
+
"<loc0829>": 51087,
|
833 |
+
"<loc0830>": 51088,
|
834 |
+
"<loc0831>": 51089,
|
835 |
+
"<loc0832>": 51090,
|
836 |
+
"<loc0833>": 51091,
|
837 |
+
"<loc0834>": 51092,
|
838 |
+
"<loc0835>": 51093,
|
839 |
+
"<loc0836>": 51094,
|
840 |
+
"<loc0837>": 51095,
|
841 |
+
"<loc0838>": 51096,
|
842 |
+
"<loc0839>": 51097,
|
843 |
+
"<loc0840>": 51098,
|
844 |
+
"<loc0841>": 51099,
|
845 |
+
"<loc0842>": 51100,
|
846 |
+
"<loc0843>": 51101,
|
847 |
+
"<loc0844>": 51102,
|
848 |
+
"<loc0845>": 51103,
|
849 |
+
"<loc0846>": 51104,
|
850 |
+
"<loc0847>": 51105,
|
851 |
+
"<loc0848>": 51106,
|
852 |
+
"<loc0849>": 51107,
|
853 |
+
"<loc0850>": 51108,
|
854 |
+
"<loc0851>": 51109,
|
855 |
+
"<loc0852>": 51110,
|
856 |
+
"<loc0853>": 51111,
|
857 |
+
"<loc0854>": 51112,
|
858 |
+
"<loc0855>": 51113,
|
859 |
+
"<loc0856>": 51114,
|
860 |
+
"<loc0857>": 51115,
|
861 |
+
"<loc0858>": 51116,
|
862 |
+
"<loc0859>": 51117,
|
863 |
+
"<loc0860>": 51118,
|
864 |
+
"<loc0861>": 51119,
|
865 |
+
"<loc0862>": 51120,
|
866 |
+
"<loc0863>": 51121,
|
867 |
+
"<loc0864>": 51122,
|
868 |
+
"<loc0865>": 51123,
|
869 |
+
"<loc0866>": 51124,
|
870 |
+
"<loc0867>": 51125,
|
871 |
+
"<loc0868>": 51126,
|
872 |
+
"<loc0869>": 51127,
|
873 |
+
"<loc0870>": 51128,
|
874 |
+
"<loc0871>": 51129,
|
875 |
+
"<loc0872>": 51130,
|
876 |
+
"<loc0873>": 51131,
|
877 |
+
"<loc0874>": 51132,
|
878 |
+
"<loc0875>": 51133,
|
879 |
+
"<loc0876>": 51134,
|
880 |
+
"<loc0877>": 51135,
|
881 |
+
"<loc0878>": 51136,
|
882 |
+
"<loc0879>": 51137,
|
883 |
+
"<loc0880>": 51138,
|
884 |
+
"<loc0881>": 51139,
|
885 |
+
"<loc0882>": 51140,
|
886 |
+
"<loc0883>": 51141,
|
887 |
+
"<loc0884>": 51142,
|
888 |
+
"<loc0885>": 51143,
|
889 |
+
"<loc0886>": 51144,
|
890 |
+
"<loc0887>": 51145,
|
891 |
+
"<loc0888>": 51146,
|
892 |
+
"<loc0889>": 51147,
|
893 |
+
"<loc0890>": 51148,
|
894 |
+
"<loc0891>": 51149,
|
895 |
+
"<loc0892>": 51150,
|
896 |
+
"<loc0893>": 51151,
|
897 |
+
"<loc0894>": 51152,
|
898 |
+
"<loc0895>": 51153,
|
899 |
+
"<loc0896>": 51154,
|
900 |
+
"<loc0897>": 51155,
|
901 |
+
"<loc0898>": 51156,
|
902 |
+
"<loc0899>": 51157,
|
903 |
+
"<loc0900>": 51158,
|
904 |
+
"<loc0901>": 51159,
|
905 |
+
"<loc0902>": 51160,
|
906 |
+
"<loc0903>": 51161,
|
907 |
+
"<loc0904>": 51162,
|
908 |
+
"<loc0905>": 51163,
|
909 |
+
"<loc0906>": 51164,
|
910 |
+
"<loc0907>": 51165,
|
911 |
+
"<loc0908>": 51166,
|
912 |
+
"<loc0909>": 51167,
|
913 |
+
"<loc0910>": 51168,
|
914 |
+
"<loc0911>": 51169,
|
915 |
+
"<loc0912>": 51170,
|
916 |
+
"<loc0913>": 51171,
|
917 |
+
"<loc0914>": 51172,
|
918 |
+
"<loc0915>": 51173,
|
919 |
+
"<loc0916>": 51174,
|
920 |
+
"<loc0917>": 51175,
|
921 |
+
"<loc0918>": 51176,
|
922 |
+
"<loc0919>": 51177,
|
923 |
+
"<loc0920>": 51178,
|
924 |
+
"<loc0921>": 51179,
|
925 |
+
"<loc0922>": 51180,
|
926 |
+
"<loc0923>": 51181,
|
927 |
+
"<loc0924>": 51182,
|
928 |
+
"<loc0925>": 51183,
|
929 |
+
"<loc0926>": 51184,
|
930 |
+
"<loc0927>": 51185,
|
931 |
+
"<loc0928>": 51186,
|
932 |
+
"<loc0929>": 51187,
|
933 |
+
"<loc0930>": 51188,
|
934 |
+
"<loc0931>": 51189,
|
935 |
+
"<loc0932>": 51190,
|
936 |
+
"<loc0933>": 51191,
|
937 |
+
"<loc0934>": 51192,
|
938 |
+
"<loc0935>": 51193,
|
939 |
+
"<loc0936>": 51194,
|
940 |
+
"<loc0937>": 51195,
|
941 |
+
"<loc0938>": 51196,
|
942 |
+
"<loc0939>": 51197,
|
943 |
+
"<loc0940>": 51198,
|
944 |
+
"<loc0941>": 51199,
|
945 |
+
"<loc0942>": 51200,
|
946 |
+
"<loc0943>": 51201,
|
947 |
+
"<loc0944>": 51202,
|
948 |
+
"<loc0945>": 51203,
|
949 |
+
"<loc0946>": 51204,
|
950 |
+
"<loc0947>": 51205,
|
951 |
+
"<loc0948>": 51206,
|
952 |
+
"<loc0949>": 51207,
|
953 |
+
"<loc0950>": 51208,
|
954 |
+
"<loc0951>": 51209,
|
955 |
+
"<loc0952>": 51210,
|
956 |
+
"<loc0953>": 51211,
|
957 |
+
"<loc0954>": 51212,
|
958 |
+
"<loc0955>": 51213,
|
959 |
+
"<loc0956>": 51214,
|
960 |
+
"<loc0957>": 51215,
|
961 |
+
"<loc0958>": 51216,
|
962 |
+
"<loc0959>": 51217,
|
963 |
+
"<loc0960>": 51218,
|
964 |
+
"<loc0961>": 51219,
|
965 |
+
"<loc0962>": 51220,
|
966 |
+
"<loc0963>": 51221,
|
967 |
+
"<loc0964>": 51222,
|
968 |
+
"<loc0965>": 51223,
|
969 |
+
"<loc0966>": 51224,
|
970 |
+
"<loc0967>": 51225,
|
971 |
+
"<loc0968>": 51226,
|
972 |
+
"<loc0969>": 51227,
|
973 |
+
"<loc0970>": 51228,
|
974 |
+
"<loc0971>": 51229,
|
975 |
+
"<loc0972>": 51230,
|
976 |
+
"<loc0973>": 51231,
|
977 |
+
"<loc0974>": 51232,
|
978 |
+
"<loc0975>": 51233,
|
979 |
+
"<loc0976>": 51234,
|
980 |
+
"<loc0977>": 51235,
|
981 |
+
"<loc0978>": 51236,
|
982 |
+
"<loc0979>": 51237,
|
983 |
+
"<loc0980>": 51238,
|
984 |
+
"<loc0981>": 51239,
|
985 |
+
"<loc0982>": 51240,
|
986 |
+
"<loc0983>": 51241,
|
987 |
+
"<loc0984>": 51242,
|
988 |
+
"<loc0985>": 51243,
|
989 |
+
"<loc0986>": 51244,
|
990 |
+
"<loc0987>": 51245,
|
991 |
+
"<loc0988>": 51246,
|
992 |
+
"<loc0989>": 51247,
|
993 |
+
"<loc0990>": 51248,
|
994 |
+
"<loc0991>": 51249,
|
995 |
+
"<loc0992>": 51250,
|
996 |
+
"<loc0993>": 51251,
|
997 |
+
"<loc0994>": 51252,
|
998 |
+
"<loc0995>": 51253,
|
999 |
+
"<loc0996>": 51254,
|
1000 |
+
"<loc0997>": 51255,
|
1001 |
+
"<loc0998>": 51256,
|
1002 |
+
"<loc0999>": 51257,
|
1003 |
+
"<loc1000>": 51258,
|
1004 |
+
"<loc1001>": 51259,
|
1005 |
+
"<loc1002>": 51260,
|
1006 |
+
"<loc1003>": 51261,
|
1007 |
+
"<loc1004>": 51262,
|
1008 |
+
"<loc1005>": 51263,
|
1009 |
+
"<loc1006>": 51264,
|
1010 |
+
"<loc1007>": 51265,
|
1011 |
+
"<loc1008>": 51266,
|
1012 |
+
"<loc1009>": 51267,
|
1013 |
+
"<loc1010>": 51268,
|
1014 |
+
"<loc1011>": 51269,
|
1015 |
+
"<loc1012>": 51270,
|
1016 |
+
"<loc1013>": 51271,
|
1017 |
+
"<loc1014>": 51272,
|
1018 |
+
"<loc1015>": 51273,
|
1019 |
+
"<loc1016>": 51274,
|
1020 |
+
"<loc1017>": 51275,
|
1021 |
+
"<loc1018>": 51276,
|
1022 |
+
"<loc1019>": 51277,
|
1023 |
+
"<loc1020>": 51278,
|
1024 |
+
"<loc1021>": 51279,
|
1025 |
+
"<loc1022>": 51280,
|
1026 |
+
"<loc1023>": 51281
|
1027 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
preprocessor_config.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"auto_map": {
|
3 |
+
"AutoProcessor": "processing_travisionlm.TraVisionProcessor"
|
4 |
+
},
|
5 |
+
"do_convert_rgb": null,
|
6 |
+
"do_normalize": true,
|
7 |
+
"do_rescale": true,
|
8 |
+
"do_resize": true,
|
9 |
+
"image_mean": [
|
10 |
+
0.5,
|
11 |
+
0.5,
|
12 |
+
0.5
|
13 |
+
],
|
14 |
+
"image_processor_type": "SiglipImageProcessor",
|
15 |
+
"image_seq_length": 256,
|
16 |
+
"image_std": [
|
17 |
+
0.5,
|
18 |
+
0.5,
|
19 |
+
0.5
|
20 |
+
],
|
21 |
+
"processor_class": "TraVisionProcessor",
|
22 |
+
"resample": 3,
|
23 |
+
"rescale_factor": 0.00392156862745098,
|
24 |
+
"size": {
|
25 |
+
"height": 256,
|
26 |
+
"width": 256
|
27 |
+
}
|
28 |
+
}
|
processing_travisionlm.py
ADDED
@@ -0,0 +1,276 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Processor class for TraVisionLM.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import logging
|
6 |
+
from typing import List, Optional, Union
|
7 |
+
|
8 |
+
from transformers.feature_extraction_utils import BatchFeature
|
9 |
+
from transformers.image_utils import ImageInput, is_valid_image
|
10 |
+
from transformers.processing_utils import ProcessorMixin
|
11 |
+
from transformers.tokenization_utils import (
|
12 |
+
AddedToken,
|
13 |
+
PaddingStrategy,
|
14 |
+
PreTokenizedInput,
|
15 |
+
TextInput,
|
16 |
+
TruncationStrategy,
|
17 |
+
)
|
18 |
+
from transformers.utils import TensorType
|
19 |
+
|
20 |
+
logger = logging.getLogger(__name__)
|
21 |
+
|
22 |
+
IMAGE_TOKEN = "<image>"
|
23 |
+
EXTRA_TOKENS = [f"<loc{i:0>4}>" for i in range(1024)] # for object detection task
|
24 |
+
|
25 |
+
# Copied from transformers.models.idefics2.processing_idefics2.is_url
|
26 |
+
def is_url(val) -> bool:
|
27 |
+
return isinstance(val, str) and val.startswith("http")
|
28 |
+
|
29 |
+
|
30 |
+
# Copied from transformers.models.idefics2.processing_idefics2.is_image_or_image_url
|
31 |
+
def is_image_or_image_url(elem):
|
32 |
+
return is_url(elem) or is_valid_image(elem)
|
33 |
+
|
34 |
+
# Copied from transformers.models.paligemma.processing_paligemma._is_str_or_image
|
35 |
+
def _is_str_or_image(elem):
|
36 |
+
return isinstance(elem, (str)) or is_image_or_image_url(elem)
|
37 |
+
|
38 |
+
|
39 |
+
def build_string_from_input(image_seq_len, image_token):
|
40 |
+
"""
|
41 |
+
Builds a string from the input prompt and image tokens.
|
42 |
+
For example, for the call:
|
43 |
+
build_string_from_input(
|
44 |
+
image_seq_len=3,
|
45 |
+
image_token="<im>",
|
46 |
+
)
|
47 |
+
The output will be:
|
48 |
+
"<im><im><im>"
|
49 |
+
Args:
|
50 |
+
image_seq_len (`int`): The length of the image sequence.
|
51 |
+
image_token (`str`): The image token.
|
52 |
+
"""
|
53 |
+
return f"{image_token * image_seq_len}"
|
54 |
+
|
55 |
+
|
56 |
+
class TraVisionProcessor(ProcessorMixin):
|
57 |
+
r"""
|
58 |
+
Constructs a TraVision processor which wraps a SigLIP image processor and a GPT2 tokenizer into a single processor.
|
59 |
+
|
60 |
+
[`TraVisionProcessor`] offers all the functionalities of [`SiglipImageProcessor`] and [`GPT2TokenizerFast`]. See the
|
61 |
+
[`~TraVisionProcessor.__call__`] and [`~TraVisionProcessor.decode`] for more information.
|
62 |
+
|
63 |
+
Args:
|
64 |
+
image_processor ([`SiglipImageProcessor`], *optional*):
|
65 |
+
The image processor is a required input.
|
66 |
+
tokenizer ([`GPT2TokenizerFast`], *optional*):
|
67 |
+
The tokenizer is a required input.
|
68 |
+
chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages
|
69 |
+
in a chat into a tokenizable string.
|
70 |
+
"""
|
71 |
+
|
72 |
+
attributes = ["image_processor", "tokenizer"]
|
73 |
+
valid_kwargs = ["chat_template"]
|
74 |
+
image_processor_class = "SiglipImageProcessor"
|
75 |
+
tokenizer_class = ("GPT2Tokenizer", "GPT2TokenizerFast")
|
76 |
+
|
77 |
+
def __init__(
|
78 |
+
self,
|
79 |
+
image_processor=None,
|
80 |
+
tokenizer=None,
|
81 |
+
chat_template=None,
|
82 |
+
**kwargs,
|
83 |
+
):
|
84 |
+
if image_processor is None:
|
85 |
+
raise ValueError("You need to specify an `image_processor`.")
|
86 |
+
if tokenizer is None:
|
87 |
+
raise ValueError("You need to specify a `tokenizer`.")
|
88 |
+
if not hasattr(image_processor, "image_seq_length"):
|
89 |
+
raise ValueError("Image processor is missing an `image_seq_length` attribute.")
|
90 |
+
|
91 |
+
self.image_seq_length = image_processor.image_seq_length
|
92 |
+
|
93 |
+
image_token = AddedToken(IMAGE_TOKEN, normalized=False, special=True)
|
94 |
+
tokens_to_add = {"additional_special_tokens": [image_token]}
|
95 |
+
tokenizer.add_special_tokens(tokens_to_add)
|
96 |
+
tokenizer.add_tokens(EXTRA_TOKENS)
|
97 |
+
self.image_token_id = tokenizer.convert_tokens_to_ids(IMAGE_TOKEN)
|
98 |
+
tokenizer.add_bos_token = False
|
99 |
+
tokenizer.add_eos_token = False
|
100 |
+
|
101 |
+
super().__init__(image_processor, tokenizer, chat_template=chat_template)
|
102 |
+
|
103 |
+
def __call__(
|
104 |
+
self,
|
105 |
+
text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
|
106 |
+
images: ImageInput = None,
|
107 |
+
tokenize_newline_separately: bool = True,
|
108 |
+
padding: Union[bool, str, PaddingStrategy] = False,
|
109 |
+
truncation: Union[bool, str, TruncationStrategy] = None,
|
110 |
+
max_length=None,
|
111 |
+
return_tensors: Optional[Union[str, TensorType]] = TensorType.PYTORCH,
|
112 |
+
do_resize: bool = None,
|
113 |
+
do_normalize: bool = None,
|
114 |
+
image_mean: Optional[Union[float, List[float]]] = None,
|
115 |
+
image_std: Optional[Union[float, List[float]]] = None,
|
116 |
+
data_format: Optional["ChannelDimension"] = "channels_first", # noqa: F821
|
117 |
+
input_data_format: Optional[
|
118 |
+
Union[str, "ChannelDimension"] # noqa: F821
|
119 |
+
] = None,
|
120 |
+
resample: "PILImageResampling" = None, # noqa: F821
|
121 |
+
do_convert_rgb: bool = None,
|
122 |
+
do_thumbnail: bool = None,
|
123 |
+
do_align_long_axis: bool = None,
|
124 |
+
do_rescale: bool = None,
|
125 |
+
labels: Optional[Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]]] = None,
|
126 |
+
) -> BatchFeature:
|
127 |
+
"""
|
128 |
+
Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`
|
129 |
+
and `kwargs` arguments to GPT2TokenizerFast's [`~GPT2TokenizerFast.__call__`] if `text` is not `None` to encode
|
130 |
+
the text. To prepare the image(s), this method forwards the `images` and `kwrags` arguments to
|
131 |
+
SiglipImageProcessor's [`~SiglipImageProcessor.__call__`] if `images` is not `None`. Please refer to the doctsring
|
132 |
+
of the above two methods for more information.
|
133 |
+
|
134 |
+
The usage for TraVisionLM fine-tuning preparation follows a standard 4D causal mask where only the prompt and label tokens
|
135 |
+
are attended in an auto-regressive manner. The label in `text` are to be passed separately to the __call__ function and
|
136 |
+
will be placed after the prompt, which is the instruction to steer the model generation.
|
137 |
+
|
138 |
+
Args:
|
139 |
+
text (`str`, `List[str]`, `List[List[str]]`):
|
140 |
+
The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
|
141 |
+
(pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
|
142 |
+
`is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
|
143 |
+
images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`):
|
144 |
+
The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
|
145 |
+
tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a
|
146 |
+
number of channels, H and W are image height and width.
|
147 |
+
tokenize_newline_separately (`bool`, defaults to `True`):
|
148 |
+
Adds a separately tokenized '\n' at the end of the prompt.
|
149 |
+
padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`):
|
150 |
+
Select a strategy to pad the returned sequences (according to the model's padding side and padding
|
151 |
+
index) among:
|
152 |
+
- `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
|
153 |
+
sequence if provided).
|
154 |
+
- `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
|
155 |
+
acceptable input length for the model if that argument is not provided.
|
156 |
+
- `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
|
157 |
+
lengths).
|
158 |
+
max_length (`int`, *optional*):
|
159 |
+
Maximum length of the returned list and optionally padding length (see above).
|
160 |
+
truncation (`bool`, *optional*):
|
161 |
+
Activates truncation to cut input sequences longer than `max_length` to `max_length`.
|
162 |
+
return_tensors (`str` or [`~utils.TensorType`], *optional*):
|
163 |
+
If set, will return tensors of a particular framework. Acceptable values are:
|
164 |
+
|
165 |
+
- `'tf'`: Return TensorFlow `tf.constant` objects.
|
166 |
+
- `'pt'`: Return PyTorch `torch.Tensor` objects.
|
167 |
+
- `'np'`: Return NumPy `np.ndarray` objects.
|
168 |
+
- `'jax'`: Return JAX `jnp.ndarray` objects.
|
169 |
+
label (`str`, `List[str]`, `List[List[str]]`):
|
170 |
+
The label or batch of labels to be encoded. Only necessary for training.
|
171 |
+
for more information. If your prompt is "<image> Resimde ne var", the label corresponds to the expected prediction "çimlerde uzanan bir köpek".
|
172 |
+
|
173 |
+
Returns:
|
174 |
+
[`BatchFeature`]: A [`BatchFeature`] with the following fields:
|
175 |
+
|
176 |
+
- **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`. If `label`
|
177 |
+
is provided, the `input_ids` will also contain the label input ids.
|
178 |
+
- **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
|
179 |
+
`return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
|
180 |
+
`None`).
|
181 |
+
- **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
|
182 |
+
- **labels** -- Labels compatible with training if `label` is not None
|
183 |
+
"""
|
184 |
+
|
185 |
+
# return_token_type_ids = True if labels is not None else False
|
186 |
+
return_token_type_ids = True
|
187 |
+
|
188 |
+
if images is None:
|
189 |
+
raise ValueError("`images` are expected as arguments to a `TraVisionProcessor` instance.")
|
190 |
+
if text is None:
|
191 |
+
logger.warning_once(
|
192 |
+
"You are using TraVisionLM without a text prefix. It will perform as a picture-captioning model."
|
193 |
+
)
|
194 |
+
text = "Açıkla" # default prompt if it is not provided as an argument
|
195 |
+
|
196 |
+
if isinstance(text, List) and isinstance(images, List):
|
197 |
+
if len(images) < len(text):
|
198 |
+
raise ValueError(
|
199 |
+
f"Received {len(images)} images for {len(text)} prompts. Each prompt should be associated with an image."
|
200 |
+
)
|
201 |
+
if _is_str_or_image(text):
|
202 |
+
text = [text]
|
203 |
+
elif isinstance(text, list) and _is_str_or_image(text[0]):
|
204 |
+
pass
|
205 |
+
text = [f"{prompt}\n" for prompt in text]
|
206 |
+
|
207 |
+
if labels is not None and _is_str_or_image(labels):
|
208 |
+
labels = [labels]
|
209 |
+
if labels is not None:
|
210 |
+
labels = [label + self.tokenizer.eos_token for label in labels]
|
211 |
+
|
212 |
+
text = [f"{prompt}{label}" for prompt, label in zip(text, labels)]
|
213 |
+
|
214 |
+
input_strings = [
|
215 |
+
build_string_from_input(
|
216 |
+
image_seq_len=self.image_seq_length,
|
217 |
+
image_token=IMAGE_TOKEN,
|
218 |
+
)
|
219 |
+
for _ in text
|
220 |
+
]
|
221 |
+
|
222 |
+
pixel_values = self.image_processor(
|
223 |
+
images,
|
224 |
+
do_resize=do_resize,
|
225 |
+
do_normalize=do_normalize,
|
226 |
+
return_tensors=return_tensors,
|
227 |
+
image_mean=image_mean,
|
228 |
+
image_std=image_std,
|
229 |
+
input_data_format=input_data_format,
|
230 |
+
data_format=data_format,
|
231 |
+
resample=resample,
|
232 |
+
do_convert_rgb=do_convert_rgb,
|
233 |
+
)["pixel_values"]
|
234 |
+
|
235 |
+
if max_length is not None:
|
236 |
+
max_length += self.image_seq_length # max_length has to account for the image tokens
|
237 |
+
|
238 |
+
inputs = self.tokenizer(
|
239 |
+
input_strings,
|
240 |
+
text_pair=text,
|
241 |
+
return_tensors=return_tensors,
|
242 |
+
padding=padding,
|
243 |
+
max_length=max_length,
|
244 |
+
truncation=truncation,
|
245 |
+
return_token_type_ids=return_token_type_ids,
|
246 |
+
)
|
247 |
+
|
248 |
+
return_data = {**inputs, "pixel_values": pixel_values}
|
249 |
+
|
250 |
+
if labels is not None:
|
251 |
+
labels = inputs["input_ids"].masked_fill(inputs["token_type_ids"] == 0, -100)
|
252 |
+
return_data.update({"labels": labels})
|
253 |
+
return BatchFeature(data=return_data)
|
254 |
+
|
255 |
+
# Copied from transformers.models.clip.processing_clip.CLIPProcessor.batch_decode with CLIP->GPT2
|
256 |
+
def batch_decode(self, *args, **kwargs):
|
257 |
+
"""
|
258 |
+
This method forwards all its arguments to GPT2TokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
|
259 |
+
refer to the docstring of this method for more information.
|
260 |
+
"""
|
261 |
+
return self.tokenizer.batch_decode(*args, **kwargs)
|
262 |
+
|
263 |
+
# Copied from transformers.models.clip.processing_clip.CLIPProcessor.decode with CLIP->GPT2
|
264 |
+
def decode(self, *args, **kwargs):
|
265 |
+
"""
|
266 |
+
This method forwards all its arguments to GPT2TokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
|
267 |
+
the docstring of this method for more information.
|
268 |
+
"""
|
269 |
+
return self.tokenizer.decode(*args, **kwargs)
|
270 |
+
|
271 |
+
@property
|
272 |
+
# Copied from transformers.models.clip.processing_clip.CLIPProcessor.model_input_names with CLIP->TraVision
|
273 |
+
def model_input_names(self):
|
274 |
+
tokenizer_input_names = self.tokenizer.model_input_names
|
275 |
+
image_processor_input_names = self.image_processor.model_input_names
|
276 |
+
return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
|
processor_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"auto_map": {
|
3 |
+
"AutoProcessor": "processing_travisionlm.TraVisionProcessor"
|
4 |
+
},
|
5 |
+
"processor_class": "TraVisionProcessor"
|
6 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
{
|
4 |
+
"content": "<image>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false
|
9 |
+
}
|
10 |
+
],
|
11 |
+
"bos_token": {
|
12 |
+
"content": "<|endoftext|>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": true,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false
|
17 |
+
},
|
18 |
+
"eos_token": {
|
19 |
+
"content": "<|endoftext|>",
|
20 |
+
"lstrip": false,
|
21 |
+
"normalized": true,
|
22 |
+
"rstrip": false,
|
23 |
+
"single_word": false
|
24 |
+
},
|
25 |
+
"pad_token": {
|
26 |
+
"content": "<|endoftext|>",
|
27 |
+
"lstrip": false,
|
28 |
+
"normalized": true,
|
29 |
+
"rstrip": false,
|
30 |
+
"single_word": false
|
31 |
+
},
|
32 |
+
"unk_token": {
|
33 |
+
"content": "<|endoftext|>",
|
34 |
+
"lstrip": false,
|
35 |
+
"normalized": true,
|
36 |
+
"rstrip": false,
|
37 |
+
"single_word": false
|
38 |
+
}
|
39 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|