File size: 28,949 Bytes
d2bf885
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
interactions:
- request:
    body: null
    headers:
      Accept:
      - '*/*'
      Accept-Encoding:
      - gzip, deflate
      Connection:
      - keep-alive
      User-Agent:
      - python-requests/2.32.3
    method: GET
    uri: https://raw.githubusercontent.com/avidale/encodechka/master/README.md
  response:
    body:
      string: "# encodechka\n## encodechka-eval\n\n\u042D\u0442\u043E\u0442 \u0440\u0435\u043F\u043E\u0437\u0438\u0442\u043E\u0440\u0438\u0439
        - \u0440\u0430\u0437\u0432\u0438\u0442\u0438\u0435 \u043F\u043E\u0434\u0445\u043E\u0434\u0430
        \u043A \u043E\u0446\u0435\u043D\u043A\u0435 \u043C\u043E\u0434\u0435\u043B\u0435\u0439
        \u0438\u0437 \u043F\u043E\u0441\u0442\u0430\n[\u041C\u0430\u043B\u0435\u043D\u044C\u043A\u0438\u0439
        \u0438 \u0431\u044B\u0441\u0442\u0440\u044B\u0439 BERT \u0434\u043B\u044F
        \u0440\u0443\u0441\u0441\u043A\u043E\u0433\u043E \u044F\u0437\u044B\u043A\u0430](https://habr.com/ru/post/562064),
        \n\u044D\u0432\u043E\u043B\u044E\u0446\u0438\u043E\u043D\u0438\u0440\u043E\u0432\u0430\u0432\u0448\u0435\u0433\u043E
        \u0432 [\u0420\u0435\u0439\u0442\u0438\u043D\u0433 \u0440\u0443\u0441\u0441\u043A\u043E\u044F\u0437\u044B\u0447\u043D\u044B\u0445
        \u044D\u043D\u043A\u043E\u0434\u0435\u0440\u043E\u0432 \u043F\u0440\u0435\u0434\u043B\u043E\u0436\u0435\u043D\u0438\u0439](https://habr.com/ru/post/669674/).\n\u0418\u0434\u0435\u044F
        \u0432 \u0442\u043E\u043C, \u0447\u0442\u043E\u0431\u044B \u043F\u043E\u043D\u044F\u0442\u044C,
        \u043A\u0430\u043A \u0445\u043E\u0440\u043E\u0448\u043E \u0440\u0430\u0437\u043D\u044B\u0435
        \u043C\u043E\u0434\u0435\u043B\u0438 \u043F\u0440\u0435\u0432\u0440\u0430\u0449\u0430\u044E\u0442
        \u043A\u043E\u0440\u043E\u0442\u043A\u0438\u0435 \u0442\u0435\u043A\u0441\u0442\u044B\n\u0432
        \u043E\u0441\u043C\u044B\u0441\u043B\u0435\u043D\u043D\u044B\u0435 \u0432\u0435\u043A\u0442\u043E\u0440\u044B.\n\n\u041F\u043E\u0445\u043E\u0436\u0438\u0435
        \u043F\u0440\u043E\u0435\u043A\u0442\u044B:\n* [RussianSuperGLUE](https://russiansuperglue.com/):
        \u0444\u043E\u043A\u0443\u0441 \u043D\u0430 \u0434\u043E\u043E\u0431\u0443\u0447\u0430\u0435\u043C\u044B\u0445
        \u043C\u043E\u0434\u0435\u043B\u044F\u0445\n* [MOROCCO](https://github.com/RussianNLP/MOROCCO/):
        RussianSuperGLUE + \u043E\u0446\u0435\u043D\u043A\u0430 \u043F\u0440\u043E\u0438\u0437\u0432\u043E\u0434\u0438\u0442\u0435\u043B\u044C\u043D\u043E\u0441\u0442\u0438,
        \u0442\u0440\u0443\u0434\u043D\u043E\u0432\u043E\u0441\u043F\u0440\u043E\u0438\u0437\u0432\u043E\u0434\u0438\u043C\n*
        [RuSentEval](https://github.com/RussianNLP/RuSentEval): \u0431\u043E\u043B\u0435\u0435
        \u0430\u043A\u0430\u0434\u0435\u043C\u0438\u0447\u0435\u0441\u043A\u0438\u0435/\u043B\u0438\u043D\u0433\u0432\u0438\u0441\u0442\u0438\u0447\u0435\u0441\u043A\u0438\u0435
        \u0437\u0430\u0434\u0430\u0447\u0438\n* \u0421\u0442\u0430\u0442\u044C\u044F
        \u043E\u0442 \u0412\u044B\u0448\u043A\u0438 [Popov et al, 2019](https://arxiv.org/abs/1910.13291):
        \u043F\u0435\u0440\u0432\u0430\u044F \u043D\u0430\u0443\u0447\u043D\u0430\u044F
        \u0441\u0442\u0430\u0442\u044C\u044F \u043D\u0430 \u044D\u0442\u0443 \u0442\u0435\u043C\u0443,
        \u043D\u043E \u043C\u0430\u043B\u043E\u0432\u0430\u0442\u043E \u043C\u043E\u0434\u0435\u043B\u0435\u0439
        \u0438 \u0437\u0430\u0434\u0430\u0447\n* [SentEvalRu](https://github.com/comptechml/SentEvalRu)
        \u0438 [deepPavlovEval](https://github.com/deepmipt/deepPavlovEval): \u0434\u0432\u0430
        \u0445\u043E\u0440\u043E\u0448\u0438\u0445, \u043D\u043E \u0434\u0430\u0432\u043D\u043E
        \u043D\u0435 \u043E\u0431\u043D\u043E\u0432\u043B\u044F\u0432\u0448\u0438\u0445\u0441\u044F
        \u0431\u0435\u043D\u0447\u043C\u0430\u0440\u043A\u0430. \n\n\u041F\u0440\u0438\u043C\u0435\u0440
        \u0437\u0430\u043F\u0443\u0441\u043A\u0430 \u043C\u0435\u0442\u0440\u0438\u043A
        \u2013 \u0432 \u0431\u043B\u043E\u043A\u043D\u043E\u0442\u0435 [evaluation
        example](https://github.com/avidale/encodechka/blob/master/evaluation%20example.ipynb).
        \n\n\u0411\u043B\u043E\u043A\u043D\u043E\u0442 \u0434\u043B\u044F \u0432\u043E\u0441\u043F\u0440\u043E\u0438\u0437\u0432\u0435\u0434\u0435\u043D\u0438\u044F
        \u043B\u0438\u0434\u0435\u0440\u0431\u043E\u0440\u0434\u0430: [v2021](https://colab.research.google.com/drive/1fu2i7A-Yr-85Ex_NvIyeCIO7lN2R7P-k?usp=sharing),
        \n[v2023](https://colab.research.google.com/drive/1t956aJsp5qPnst3379vI8NNRqiqJUFMn?usp=sharing).\n\n###
        \u041B\u0438\u0434\u0435\u0440\u0431\u043E\u0440\u0434\n\n\u0420\u0430\u043D\u0436\u0438\u0440\u043E\u0432\u0430\u043D\u0438\u0435
        \u043C\u043E\u0434\u0435\u043B\u0435\u0439 \u0432 \u043F\u043E \u0441\u0440\u0435\u0434\u043D\u0435\u043C\u0443
        \u043A\u0430\u0447\u0435\u0441\u0442\u0432\u0443 \u0438 \u043F\u0440\u043E\u0438\u0437\u0432\u043E\u0434\u0438\u0442\u0435\u043B\u044C\u043D\u043E\u0441\u0442\u0438.
        \n\u041F\u043E\u0434\u0441\u0432\u0435\u0447\u0435\u043D\u044B \u041F\u0430\u0440\u0435\u0442\u043E-\u043E\u043F\u0442\u0438\u043C\u0430\u043B\u044C\u043D\u044B\u0435
        \u043C\u043E\u0434\u0435\u043B\u0438 \u043F\u043E \u043A\u0430\u0436\u0434\u043E\u043C\u0443
        \u0438\u0437 \u043A\u0440\u0438\u0442\u0435\u0440\u0438\u0435\u0432. \n\n|
        model                                                       | CPU       |
        GPU      | size          |   Mean S | Mean S+W   |   dim |\n|:------------------------------------------------------------|:----------|:---------|:--------------|---------:|:-----------|------:|\n|
        BAAI/bge-m3                                                 | 523.4     |
        22.5     | **2166.0**    |    0.787 | 0.696      |  1024 |\n| intfloat/multilingual-e5-large-instruct
        \                    | 501.5     | 25.71    | **2136.0**    |    0.784 | 0.684
        \     |  1024 |\n| intfloat/multilingual-e5-large                              |
        **506.8** | **30.8** | **2135.9389** |    0.78  | 0.686      |  1024 |\n|
        sentence-transformers/paraphrase-multilingual-mpnet-base-v2 | **20.5**  |
        **19.9** | **1081.8485** |    0.762 |            |   768 |\n| intfloat/multilingual-e5-base
        \                              | 130.61    | 14.39    | **1061.0**    |    0.761
        | 0.669      |   768 |\n| intfloat/multilingual-e5-small                              |
        40.86     | 12.09    | **449.0**     |    0.742 | 0.645      |   384 |\n|
        symanto/sn-xlm-roberta-base-snli-mnli-anli-xnli             | **20.2**  |
        **16.5** | **1081.8474** |    0.739 |            |   768 |\n| cointegrated/LaBSE-en-ru
        \                                   | 133.4     | **15.3** | **489.6621**
        \ |    0.739 | 0.668      |   768 |\n| sentence-transformers/LaBSE                                 |
        135.1     | **13.3** | 1796.5078     |    0.739 | 0.667      |   768 |\n|
        MUSE-3                                                      | 200.1     |
        30.7     | **303.0**     |    0.736 |            |   512 |\n| text-embedding-ada-002
        \                                     | ?         |          | ?             |
        \   0.734 |            |  1536 |\n| sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
        | **18.2**  | 14.9     | 479.2547      |    0.734 |            |   384 |\n|
        sentence-transformers/distiluse-base-multilingual-cased-v1  | **11.8**  |
        **8.0**  | 517.7452      |    0.722 |            |   512 |\n| SONAR                                                       |
        ?         | ?        | 3060.0        |    0.721 |            |  1024 |\n|
        facebook/nllb-200-distilled-600M                            | 252.3     |
        15.9     | 1577.4828     |    0.709 | 0.64       |  1024 |\n| sentence-transformers/distiluse-base-multilingual-cased-v2
        \ | **11.2**  | 9.2      | 517.7453      |    0.708 |            |   512 |\n|
        cointegrated/rubert-tiny2                                   | **6.2**   |
        **4.6**  | **111.3823**  |    0.704 | 0.638      |   312 |\n| ai-forever/sbert_large_mt_nlu_ru
        \                           | 504.5     | 29.7     | 1628.6539     |    0.703
        | 0.626      |  1024 |\n| laser                                                       |
        192.5     | 13.5     | 200.0         |    0.699 |            |  1024 |\n|
        laser2                                                      | 163.4     |
        8.6      | 175.0         |    0.694 |            |  1024 |\n| ai-forever/sbert_large_nlu_ru
        \                              | 497.7     | 29.9     | 1628.6539     |    0.688
        | 0.626      |  1024 |\n| clips/mfaq                                                  |
        18.1      | 18.2     | 1081.8576     |    0.687 |            |   768 |\n|
        cointegrated/rut5-base-paraphraser                          | 137.0     |
        15.6     | 412.0015      |    0.685 | 0.634      |   768 |\n| DeepPavlov/rubert-base-cased-sentence
        \                      | 128.4     | 13.2     | 678.5215      |    0.678 |
        0.612      |   768 |\n| DeepPavlov/distilrubert-base-cased-conversational
        \          | 64.2      | 10.4     | 514.002       |    0.676 | 0.624      |
        \  768 |\n| DeepPavlov/distilrubert-tiny-cased-conversational           |
        21.2      | **3.3**  | 405.8292      |    0.67  | 0.616      |   768 |\n|
        cointegrated/rut5-base-multitask                            | 136.9     |
        12.7     | 412.0015      |    0.668 | 0.623      |   768 |\n| ai-forever/ruRoberta-large
        \                                 | 512.3     | 25.5     | 1355.7162     |
        \   0.666 | 0.609      |  1024 |\n| DeepPavlov/rubert-base-cased-conversational
        \                | 127.5     | 16.3     | 678.5215      |    0.653 | 0.606
        \     |   768 |\n| deepvk/deberta-v1-base                                      |
        128.6     | 19.0     | 473.2402      |    0.653 | 0.591      |   768 |\n|
        cointegrated/rubert-tiny                                    | 7.5       |
        5.9      | **44.97**     |    0.645 | 0.575      |   312 |\n| ai-forever/FRED-T5-large
        \                                   | 479.4     | 23.3     | 1372.9988     |
        \   0.639 | 0.551      |  1024 |\n| inkoziev/sbert_synonymy                                     |
        6.9       | 4.2      | 111.3823      |    0.637 | 0.566      |   312 |\n|
        numind/NuNER-multilingual-v0.1                              | 186.9     |
        10       | 678.0         |    0.633 | 0.572      |   768 |\n| cointegrated/rubert-tiny-toxicity
        \                          | 10        | 5.5      | 47.2          |    0.621
        | 0.553      |   312 |\n| ft_geowac_full                                              |
        **0.3**   |          | 1910.0        |    0.617 | 0.55       |   300 |\n|
        bert-base-multilingual-cased                                | 141.4     |
        13.7     | 678.5215      |    0.614 | 0.565      |   768 |\n| ai-forever/ruT5-large
        \                                      | 489.6     | 20.2     | 1277.7571
        \    |    0.61  | 0.578      |  1024 |\n| cointegrated/rut5-small                                     |
        37.6      | 8.6      | 111.3162      |    0.602 | 0.564      |   512 |\n|
        ft_geowac_21mb                                              | 1.2       |
        \         | **21.0**      |    0.597 | 0.531      |   300 |\n| inkoziev/sbert_pq
        \                                          | 7.4       | 4.2      | 111.3823
        \     |    0.596 | 0.526      |   312 |\n| ai-forever/ruT5-base                                        |
        126.3     | 12.8     | 418.2325      |    0.571 | 0.544      |   768 |\n|
        hashing_1000_char                                           | 0.5       |
        \         | **1.0**       |    0.557 | 0.464      |  1000 |\n| cointegrated/rut5-base
        \                                     | 127.8     | 15.5     | 412.0014      |
        \   0.554 | 0.53       |   768 |\n| hashing_300_char                                            |
        0.8       |          | 1.0           |    0.529 | 0.433      |   300 |\n|
        hashing_1000                                                | **0.2**   |
        \         | 1.0           |    0.513 | 0.416      |  1000 |\n| hashing_300
        \                                                | 0.3       |          |
        1.0           |    0.491 | 0.397      |   300 |\n\n\u0420\u0430\u043D\u0436\u0438\u0440\u043E\u0432\u0430\u043D\u0438\u0435
        \u043C\u043E\u0434\u0435\u043B\u0435\u0439 \u043F\u043E \u0437\u0430\u0434\u0430\u0447\u0430\u043C.\n\u041F\u043E\u0434\u0441\u0432\u0435\u0447\u0435\u043D\u044B
        \u043D\u0430\u0438\u043B\u0443\u0447\u0448\u0438\u0435 \u043C\u043E\u0434\u0435\u043B\u0438
        \u043F\u043E \u043A\u0430\u0436\u0434\u043E\u0439 \u0438\u0437 \u0437\u0430\u0434\u0430\u0447.
        \n\n| model                                                       | STS      |
        PI       | NLI      | SA       | TI       | IA       | IC       | ICX      |
        NE1      | NE2      |\n|:------------------------------------------------------------|:---------|:---------|:---------|:---------|:---------|:---------|:---------|:---------|:---------|:---------|\n|
        BAAI/bge-m3                                                 | **0.86** | **0.75**
        | 0.51     | **0.82** | 0.97    | 0.79     | 0.81     | **0.78** | 0.24     |
        0.42     |\n| intfloat/multilingual-e5-large-instruct                     |
        0.86     | 0.74     | 0.47     | 0.81     | 0.98    | 0.8      | **0.82**
        | 0.77     | 0.21     | 0.35     |\n| intfloat/multilingual-e5-large                              |
        0.86     | 0.73     | 0.47     | 0.81     | 0.98    | 0.8      | 0.82     |
        0.77     | 0.24     | 0.37     |\n| sentence-transformers/paraphrase-multilingual-mpnet-base-v2
        | 0.85     | 0.66     | 0.54     | 0.79     | 0.95     | 0.78     | 0.79     |
        0.74     |          |          |\n| intfloat/multilingual-e5-base                               |
        0.83     | 0.7      | 0.46     | 0.8      | 0.96    | 0.78     | 0.8      |
        0.74     | 0.23     | 0.38     |\n| intfloat/multilingual-e5-small                              |
        0.82     | 0.71     | 0.46     | 0.76     | 0.96    | 0.76     | 0.78     |
        0.69     | 0.23     | 0.27     |\n| symanto/sn-xlm-roberta-base-snli-mnli-anli-xnli
        \            | 0.76     | 0.6      | **0.86** | 0.76     | 0.91     | 0.72
        \    | 0.71     | 0.6      |          |          |\n| cointegrated/LaBSE-en-ru
        \                                   | 0.79     | 0.66     | 0.43     | 0.76
        \    | 0.95     | 0.77     | 0.79     | 0.77     | 0.35     | 0.42     |\n|
        sentence-transformers/LaBSE                                 | 0.79     | 0.66
        \    | 0.43     | 0.76     | 0.95     | 0.77     | 0.79     | 0.76     | 0.35
        \    | 0.41     |\n| MUSE-3                                                      |
        0.81     | 0.61     | 0.42     | 0.77     | 0.96     | 0.79     | 0.77     |
        0.75     |          |          |\n| text-embedding-ada-002                                      |
        0.78     | 0.66     | 0.44     | 0.77     | 0.96     | 0.77     | 0.75     |
        0.73     |          |          |\n| sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
        | 0.84     | 0.62     | 0.5      | 0.76     | 0.92     | 0.74     | 0.77     |
        0.72     |          |          |\n| sentence-transformers/distiluse-base-multilingual-cased-v1
        \ | 0.8      | 0.6      | 0.43     | 0.75     | 0.94     | 0.76     | 0.76
        \    | 0.74     |          |          |\n| SONAR                                                       |
        0.71     | 0.58     | 0.41     | 0.77     | 0.98     | 0.79     | 0.78     |
        0.74     |          |          |\n| facebook/nllb-200-distilled-600M                            |
        0.71     | 0.54     | 0.41     | 0.76     | 0.95     | 0.76     | 0.8      |
        0.75     | 0.31     | 0.42     |\n| sentence-transformers/distiluse-base-multilingual-cased-v2
        \ | 0.79     | 0.55     | 0.42     | 0.75     | 0.91     | 0.75     | 0.76
        \    | 0.73     |          |          |\n| cointegrated/rubert-tiny2                                   |
        0.75     | 0.65     | 0.42     | 0.74     | 0.94     | 0.75     | 0.76     |
        0.64     | 0.36     | 0.39     |\n| ai-forever/sbert_large_mt_nlu_ru                           |
        0.78     | 0.65     | 0.4      | 0.8      | 0.98     | 0.8      | 0.76     |
        0.45     | 0.3      | 0.34     |\n| laser                                                       |
        0.75     | 0.6      | 0.41     | 0.73     | 0.96     | 0.72     | 0.72     |
        0.7      |          |          |\n| laser2                                                      |
        0.74     | 0.6      | 0.41     | 0.73     | 0.95     | 0.72     | 0.72     |
        0.69     |          |          |\n| ai-forever/sbert_large_nlu_ru                              |
        0.68     | 0.62     | 0.39     | 0.78     | 0.98     | 0.8      | 0.78     |
        0.48     | 0.36     | 0.4      |\n| clips/mfaq                                                  |
        0.63     | 0.59     | 0.35     | 0.79     | 0.95     | 0.74     | 0.76     |
        0.69     |          |          |\n| cointegrated/rut5-base-paraphraser                          |
        0.65     | 0.53     | 0.4      | 0.78     | 0.95     | 0.75     | 0.75     |
        0.67     | 0.45     | 0.41     |\n| DeepPavlov/rubert-base-cased-sentence
        \                      | 0.74     | 0.66     | 0.49     | 0.75     | 0.92
        \    | 0.75     | 0.72     | 0.39     | 0.36     | 0.34     |\n| DeepPavlov/distilrubert-base-cased-conversational
        \          | 0.7      | 0.56     | 0.39     | 0.76     | 0.98     | 0.78     |
        0.76     | 0.48     | 0.4      | 0.43     |\n| DeepPavlov/distilrubert-tiny-cased-conversational
        \          | 0.7      | 0.55     | 0.4      | 0.74     | 0.98     | 0.78     |
        0.76     | 0.45     | 0.35     | 0.44     |\n| cointegrated/rut5-base-multitask
        \                           | 0.65     | 0.54     | 0.38     | 0.76     |
        0.95     | 0.75     | 0.72     | 0.59     | 0.47     | 0.41     |\n| ai-forever/ruRoberta-large
        \                                | 0.7      | 0.6      | 0.35     | 0.78     |
        0.98     | 0.8      | 0.78     | 0.32     | 0.3      | **0.46** |\n| DeepPavlov/rubert-base-cased-conversational
        \                | 0.68     | 0.52     | 0.38     | 0.73     | 0.98     |
        0.78     | 0.75     | 0.42     | 0.41     | 0.43     |\n| deepvk/deberta-v1-base
        \                                     | 0.68     | 0.54     | 0.38     | 0.76
        \    | 0.98     | 0.8      | 0.78     | 0.29     | 0.29     | 0.4      |\n|
        cointegrated/rubert-tiny                                    | 0.66     | 0.53
        \    | 0.4      | 0.71     | 0.89     | 0.68     | 0.7      | 0.58     | 0.24
        \    | 0.34     |\n| ai-forever/FRED-T5-large                                    |
        0.62     | 0.44     | 0.37     | 0.78     | 0.98     | **0.81** | 0.67     |
        0.45     | 0.25     | 0.15     |\n| inkoziev/sbert_synonymy                                     |
        0.69     | 0.49     | 0.41     | 0.71     | 0.91     | 0.72     | 0.69     |
        0.47     | 0.32     | 0.24     |\n| numind/NuNER-multilingual-v0.1                              |
        0.67     | 0.53     | 0.4      | 0.71     | 0.89    | 0.72     | 0.7      |
        0.46     | 0.32     | 0.34     |\n| cointegrated/rubert-tiny-toxicity                           |
        0.57     | 0.44     | 0.37     | 0.68     | **1.0** | 0.78     | 0.7      |
        0.43     | 0.24     | 0.32     |\n| ft_geowac_full                                              |
        0.69     | 0.53     | 0.37     | 0.72     | 0.97     | 0.76     | 0.66     |
        0.26     | 0.22     | 0.34     |\n| bert-base-multilingual-cased                                |
        0.66     | 0.53     | 0.37     | 0.7      | 0.89     | 0.7      | 0.69     |
        0.38     | 0.36     | 0.38     |\n| ai-forever/ruT5-large                                      |
        0.51     | 0.39     | 0.35     | 0.77     | 0.97     | 0.79     | 0.72     |
        0.38     | 0.46     | 0.44     |\n| cointegrated/rut5-small                                     |
        0.61     | 0.53     | 0.34     | 0.73     | 0.92     | 0.71     | 0.7      |
        0.27     | 0.44     | 0.38     |\n| ft_geowac_21mb                                              |
        0.68     | 0.52     | 0.36     | 0.72     | 0.96     | 0.74     | 0.65     |
        0.15     | 0.21     | 0.32     |\n| inkoziev/sbert_pq                                           |
        0.57     | 0.41     | 0.38     | 0.7      | 0.92     | 0.69     | 0.68     |
        0.43     | 0.26     | 0.24     |\n| ai-forever/ruT5-base                                       |
        0.5      | 0.28     | 0.34     | 0.73     | 0.97     | 0.76     | 0.7      |
        0.29     | 0.45     | 0.41     |\n| hashing_1000_char                                           |
        0.7      | 0.53     | 0.4      | 0.7      | 0.84     | 0.59     | 0.63     |
        0.05     | 0.05     | 0.14     |\n| cointegrated/rut5-base                                      |
        0.44     | 0.28     | 0.33     | 0.74     | 0.92     | 0.75     | 0.58     |
        0.39     | **0.48** | 0.39     |\n| hashing_300_char                                            |
        0.69     | 0.51     | 0.39     | 0.67     | 0.75     | 0.57     | 0.61     |
        0.04     | 0.03     | 0.08     |\n| hashing_1000                                                |
        0.63     | 0.49     | 0.39     | 0.66     | 0.77     | 0.55     | 0.57     |
        0.05     | 0.02     | 0.04     |\n| hashing_300                                                 |
        0.61     | 0.48     | 0.4      | 0.64     | 0.71     | 0.54     | 0.5      |
        0.05     | 0.02     | 0.02     |\n\n#### \u0417\u0430\u0434\u0430\u0447\u0438\n-
        Semantic text similarity (**STS**) \u043D\u0430 \u043E\u0441\u043D\u043E\u0432\u0435
        \u043F\u0435\u0440\u0435\u0432\u0435\u0434\u0451\u043D\u043D\u043E\u0433\u043E
        \u0434\u0430\u0442\u0430\u0441\u0435\u0442\u0430 [STS-B](https://huggingface.co/datasets/stsb_multi_mt);\n-
        Paraphrase identification (**PI**) \u043D\u0430 \u043E\u0441\u043D\u043E\u0432\u0435
        \u0434\u0430\u0442\u0430\u0441\u0435\u0442\u0430 paraphraser.ru;\n- Natural
        language inference (**NLI**) \u043D\u0430 \u0434\u0430\u0442\u0430\u0441\u0435\u0442\u0435
        [XNLI](https://github.com/facebookresearch/XNLI);\n- Sentiment analysis (**SA**)
        \u043D\u0430 \u0434\u0430\u043D\u043D\u044B\u0445 [SentiRuEval2016](http://www.dialog-21.ru/evaluation/2016/sentiment/).\n-
        Toxicity identification (**TI**) \u043D\u0430 \u0434\u0430\u0442\u0430\u0441\u0435\u0442\u0435
        \u0442\u043E\u043A\u0441\u0438\u0447\u043D\u044B\u0445 \u043A\u043E\u043C\u043C\u0435\u043D\u0442\u0430\u0440\u0438\u0435\u0432
        \u0438\u0437 [OKMLCup](https://cups.mail.ru/ru/contests/okmlcup2020);\n- Inappropriateness
        identification (**II**) \u043D\u0430 [\u0434\u0430\u0442\u0430\u0441\u0435\u0442\u0435
        \u0421\u043A\u043E\u043B\u0442\u0435\u0445\u0430](https://github.com/skoltech-nlp/inappropriate-sensitive-topics);\n-
        Intent classification (**IC**) \u0438 \u0435\u0451 \u043A\u0440\u043E\u0441\u0441-\u044F\u0437\u044B\u0447\u043D\u0430\u044F
        \u0432\u0435\u0440\u0441\u0438\u044F **ICX** \u043D\u0430 \u0434\u0430\u0442\u0430\u0441\u0435\u0442\u0435
        [NLU-evaluation-data](https://github.com/xliuhw/NLU-Evaluation-Data), \u043A\u043E\u0442\u043E\u0440\u044B\u0439
        \u044F \u0430\u0432\u0442\u043E\u043C\u0430\u0442\u0438\u0447\u0435\u0441\u043A\u0438
        \u043F\u0435\u0440\u0435\u0432\u0451\u043B \u043D\u0430 \u0440\u0443\u0441\u0441\u043A\u0438\u0439.
        \u0412 IC \u043A\u043B\u0430\u0441\u0441\u0438\u0444\u0438\u043A\u0430\u0442\u043E\u0440
        \u043E\u0431\u0443\u0447\u0430\u0435\u0442\u0441\u044F \u043D\u0430 \u0440\u0443\u0441\u0441\u043A\u0438\u0445
        \u0434\u0430\u043D\u043D\u044B\u0445, \u0430 \u0432 ICX \u2013 \u043D\u0430
        \u0430\u043D\u0433\u043B\u0438\u0439\u0441\u043A\u0438\u0445, \u0430 \u0442\u0435\u0441\u0442\u0438\u0440\u0443\u0435\u0442\u0441\u044F
        \u0432 \u043E\u0431\u043E\u0438\u0445 \u0441\u043B\u0443\u0447\u0430\u044F\u0445
        \u043D\u0430 \u0440\u0443\u0441\u0441\u043A\u0438\u0445.\n- \u0420\u0430\u0441\u043F\u043E\u0437\u043D\u0430\u0432\u0430\u043D\u0438\u0435
        \u0438\u043C\u0435\u043D\u043E\u0432\u0430\u043D\u043D\u044B\u0445 \u0441\u0443\u0449\u043D\u043E\u0441\u0442\u0435\u0439
        \u043D\u0430 \u0434\u0430\u0442\u0430\u0441\u0435\u0442\u0430\u0445 [factRuEval-2016](https://github.com/dialogue-evaluation/factRuEval-2016)
        (**NE1**) \u0438 [RuDReC](https://github.com/cimm-kzn/RuDReC) (**NE2**). \u042D\u0442\u0438
        \u0434\u0432\u0435 \u0437\u0430\u0434\u0430\u0447\u0438 \u0442\u0440\u0435\u0431\u0443\u044E\u0442
        \u043F\u043E\u043B\u0443\u0447\u0430\u0442\u044C \u044D\u043C\u0431\u0435\u0434\u0434\u0438\u043D\u0433\u0438
        \u043E\u0442\u0434\u0435\u043B\u044C\u043D\u044B\u0445 \u0442\u043E\u043A\u0435\u043D\u043E\u0432,
        \u0430 \u043D\u0435 \u0446\u0435\u043B\u044B\u0445 \u043F\u0440\u0435\u0434\u043B\u043E\u0436\u0435\u043D\u0438\u0439;
        \u043F\u043E\u044D\u0442\u043E\u043C\u0443 \u0442\u0430\u043C \u0443\u0447\u0430\u0441\u0442\u0432\u0443\u044E\u0442
        \u043D\u0435 \u0432\u0441\u0435 \u043C\u043E\u0434\u0435\u043B\u0438.\n\n###
        Changelog\n* \u0410\u0432\u0433\u0443\u0441\u0442 2023 - \u043E\u0431\u043D\u043E\u0432\u0438\u043B
        \u0440\u0435\u0439\u0442\u0438\u043D\u0433:\n   * \u043F\u043E\u043F\u0440\u0430\u0432\u0438\u0432
        \u043E\u0448\u0438\u0431\u043A\u0443 \u0432 \u0432\u044B\u0447\u0438\u0441\u043B\u0435\u043D\u0438\u0438
        mean token embeddings\n   * \u0434\u043E\u0431\u0430\u0432\u0438\u043B \u043D\u0435\u0441\u043A\u043E\u043B\u044C\u043A\u043E
        \u043C\u043E\u0434\u0435\u043B\u0435\u0439, \u0432\u043A\u043B\u044E\u0447\u0430\u044F
        \u043D\u043E\u0432\u043E\u0433\u043E \u043B\u0438\u0434\u0435\u0440\u0430
        - `intfloat/multilingual-e5-large`\n   * \u043F\u043E \u043F\u0440\u043E\u0441\u044C\u0431\u0430\u043C
        \u0442\u0440\u0443\u0434\u044F\u0449\u0438\u0445\u0441\u044F, \u0434\u043E\u0431\u0430\u0432\u0438\u043B
        `text-embedding-ada-002` (\u0440\u0430\u0437\u043C\u0435\u0440 \u0438 \u043F\u0440\u043E\u0438\u0437\u0432\u043E\u0434\u0438\u0442\u0435\u043B\u044C\u043D\u043E\u0441\u0442\u044C
        \u0443\u043A\u0430\u0437\u0430\u043D\u044B \u043E\u0442 \u0431\u0430\u043B\u0434\u044B)\n*
        \u041B\u0435\u0442\u043E 2022 - \u043E\u043F\u0443\u0431\u043B\u0438\u043A\u043E\u0432\u0430\u043B
        \u043F\u0435\u0440\u0432\u044B\u0439 \u0440\u0435\u0439\u0442\u0438\u043D\u0433\n"
    headers:
      Accept-Ranges:
      - bytes
      Access-Control-Allow-Origin:
      - '*'
      Cache-Control:
      - max-age=300
      Connection:
      - keep-alive
      Content-Encoding:
      - gzip
      Content-Length:
      - '4972'
      Content-Security-Policy:
      - default-src 'none'; style-src 'unsafe-inline'; sandbox
      Content-Type:
      - text/plain; charset=utf-8
      Cross-Origin-Resource-Policy:
      - cross-origin
      Date:
      - Thu, 13 Jun 2024 17:29:26 GMT
      ETag:
      - W/"6ef42cd6939559c9e297cd85ab8b8a44b6ce19809ce92e1efcf39d06809cd99a"
      Expires:
      - Thu, 13 Jun 2024 17:34:26 GMT
      Source-Age:
      - '245'
      Strict-Transport-Security:
      - max-age=31536000
      Vary:
      - Authorization,Accept-Encoding,Origin
      Via:
      - 1.1 varnish
      X-Cache:
      - HIT
      X-Cache-Hits:
      - '0'
      X-Content-Type-Options:
      - nosniff
      X-Fastly-Request-ID:
      - 0b5812cb6e8627abe030f2ff2764205ee7247b21
      X-Frame-Options:
      - deny
      X-GitHub-Request-Id:
      - 3467:253C76:A903D8:B1E9A7:666B25FA
      X-Served-By:
      - cache-ams21038-AMS
      X-Timer:
      - S1718299767.633243,VS0,VE2
      X-XSS-Protection:
      - 1; mode=block
    status:
      code: 200
      message: OK
version: 1