weizhou03 commited on
Commit
e848f79
·
verified ·
1 Parent(s): b01ad15

Push model using huggingface_hub.

Browse files
Files changed (4) hide show
  1. README.md +322 -2825
  2. config_setfit.json +63 -63
  3. model.safetensors +1 -1
  4. model_head.pkl +1 -1
README.md CHANGED
@@ -1,2851 +1,348 @@
1
  ---
2
- library_name: sentence-transformers
3
- pipeline_tag: sentence-similarity
4
  tags:
5
- - feature-extraction
6
- - sentence-similarity
7
- - mteb
8
- - transformers
9
- - transformers.js
 
 
 
 
 
 
 
 
 
 
 
10
  model-index:
11
- - name: epoch_0_model
12
  results:
13
  - task:
14
- type: Classification
 
15
  dataset:
16
- type: mteb/amazon_counterfactual
17
- name: MTEB AmazonCounterfactualClassification (en)
18
- config: en
19
  split: test
20
- revision: e8379541af4e31359cca9fbcf4b00f2671dba205
21
  metrics:
22
  - type: accuracy
23
- value: 75.20895522388058
24
- - type: ap
25
- value: 38.57605549557802
26
- - type: f1
27
- value: 69.35586565857854
28
- - task:
29
- type: Classification
30
- dataset:
31
- type: mteb/amazon_polarity
32
- name: MTEB AmazonPolarityClassification
33
- config: default
34
- split: test
35
- revision: e2d317d38cd51312af73b3d32a06d1a08b442046
36
- metrics:
37
- - type: accuracy
38
- value: 91.8144
39
- - type: ap
40
- value: 88.65222882032363
41
- - type: f1
42
- value: 91.80426301643274
43
- - task:
44
- type: Classification
45
- dataset:
46
- type: mteb/amazon_reviews_multi
47
- name: MTEB AmazonReviewsClassification (en)
48
- config: en
49
- split: test
50
- revision: 1399c76144fd37290681b995c656ef9b2e06e26d
51
- metrics:
52
- - type: accuracy
53
- value: 47.162000000000006
54
- - type: f1
55
- value: 46.59329642263158
56
- - task:
57
- type: Retrieval
58
- dataset:
59
- type: arguana
60
- name: MTEB ArguAna
61
- config: default
62
- split: test
63
- revision: None
64
- metrics:
65
- - type: map_at_1
66
- value: 24.253
67
- - type: map_at_10
68
- value: 38.962
69
- - type: map_at_100
70
- value: 40.081
71
- - type: map_at_1000
72
- value: 40.089000000000006
73
- - type: map_at_3
74
- value: 33.499
75
- - type: map_at_5
76
- value: 36.351
77
- - type: mrr_at_1
78
- value: 24.609
79
- - type: mrr_at_10
80
- value: 39.099000000000004
81
- - type: mrr_at_100
82
- value: 40.211000000000006
83
- - type: mrr_at_1000
84
- value: 40.219
85
- - type: mrr_at_3
86
- value: 33.677
87
- - type: mrr_at_5
88
- value: 36.469
89
- - type: ndcg_at_1
90
- value: 24.253
91
- - type: ndcg_at_10
92
- value: 48.010999999999996
93
- - type: ndcg_at_100
94
- value: 52.756
95
- - type: ndcg_at_1000
96
- value: 52.964999999999996
97
- - type: ndcg_at_3
98
- value: 36.564
99
- - type: ndcg_at_5
100
- value: 41.711999999999996
101
- - type: precision_at_1
102
- value: 24.253
103
- - type: precision_at_10
104
- value: 7.738
105
- - type: precision_at_100
106
- value: 0.98
107
- - type: precision_at_1000
108
- value: 0.1
109
- - type: precision_at_3
110
- value: 15.149000000000001
111
- - type: precision_at_5
112
- value: 11.593
113
- - type: recall_at_1
114
- value: 24.253
115
- - type: recall_at_10
116
- value: 77.383
117
- - type: recall_at_100
118
- value: 98.009
119
- - type: recall_at_1000
120
- value: 99.644
121
- - type: recall_at_3
122
- value: 45.448
123
- - type: recall_at_5
124
- value: 57.965999999999994
125
- - task:
126
- type: Clustering
127
- dataset:
128
- type: mteb/arxiv-clustering-p2p
129
- name: MTEB ArxivClusteringP2P
130
- config: default
131
- split: test
132
- revision: a122ad7f3f0291bf49cc6f4d32aa80929df69d5d
133
- metrics:
134
- - type: v_measure
135
- value: 45.69069567851087
136
- - task:
137
- type: Clustering
138
- dataset:
139
- type: mteb/arxiv-clustering-s2s
140
- name: MTEB ArxivClusteringS2S
141
- config: default
142
- split: test
143
- revision: f910caf1a6075f7329cdf8c1a6135696f37dbd53
144
- metrics:
145
- - type: v_measure
146
- value: 36.35185490976283
147
- - task:
148
- type: Reranking
149
- dataset:
150
- type: mteb/askubuntudupquestions-reranking
151
- name: MTEB AskUbuntuDupQuestions
152
- config: default
153
- split: test
154
- revision: 2000358ca161889fa9c082cb41daa8dcfb161a54
155
- metrics:
156
- - type: map
157
- value: 61.71274951450321
158
- - type: mrr
159
- value: 76.06032625423207
160
- - task:
161
- type: STS
162
- dataset:
163
- type: mteb/biosses-sts
164
- name: MTEB BIOSSES
165
- config: default
166
- split: test
167
- revision: d3fb88f8f02e40887cd149695127462bbcf29b4a
168
- metrics:
169
- - type: cos_sim_pearson
170
- value: 86.73980520022269
171
- - type: cos_sim_spearman
172
- value: 84.24649792685918
173
- - type: euclidean_pearson
174
- value: 85.85197641158186
175
- - type: euclidean_spearman
176
- value: 84.24649792685918
177
- - type: manhattan_pearson
178
- value: 86.26809552711346
179
- - type: manhattan_spearman
180
- value: 84.56397504030865
181
- - task:
182
- type: Classification
183
- dataset:
184
- type: mteb/banking77
185
- name: MTEB Banking77Classification
186
- config: default
187
- split: test
188
- revision: 0fd18e25b25c072e09e0d92ab615fda904d66300
189
- metrics:
190
- - type: accuracy
191
- value: 84.25324675324674
192
- - type: f1
193
- value: 84.17872280892557
194
- - task:
195
- type: Clustering
196
- dataset:
197
- type: mteb/biorxiv-clustering-p2p
198
- name: MTEB BiorxivClusteringP2P
199
- config: default
200
- split: test
201
- revision: 65b79d1d13f80053f67aca9498d9402c2d9f1f40
202
- metrics:
203
- - type: v_measure
204
- value: 38.770253446400886
205
- - task:
206
- type: Clustering
207
- dataset:
208
- type: mteb/biorxiv-clustering-s2s
209
- name: MTEB BiorxivClusteringS2S
210
- config: default
211
- split: test
212
- revision: 258694dd0231531bc1fd9de6ceb52a0853c6d908
213
- metrics:
214
- - type: v_measure
215
- value: 32.94307095497281
216
- - task:
217
- type: Retrieval
218
- dataset:
219
- type: BeIR/cqadupstack
220
- name: MTEB CQADupstackAndroidRetrieval
221
- config: default
222
- split: test
223
- revision: None
224
- metrics:
225
- - type: map_at_1
226
- value: 32.164
227
- - type: map_at_10
228
- value: 42.641
229
- - type: map_at_100
230
- value: 43.947
231
- - type: map_at_1000
232
- value: 44.074999999999996
233
- - type: map_at_3
234
- value: 39.592
235
- - type: map_at_5
236
- value: 41.204
237
- - type: mrr_at_1
238
- value: 39.628
239
- - type: mrr_at_10
240
- value: 48.625
241
- - type: mrr_at_100
242
- value: 49.368
243
- - type: mrr_at_1000
244
- value: 49.413000000000004
245
- - type: mrr_at_3
246
- value: 46.400000000000006
247
- - type: mrr_at_5
248
- value: 47.68
249
- - type: ndcg_at_1
250
- value: 39.628
251
- - type: ndcg_at_10
252
- value: 48.564
253
- - type: ndcg_at_100
254
- value: 53.507000000000005
255
- - type: ndcg_at_1000
256
- value: 55.635999999999996
257
- - type: ndcg_at_3
258
- value: 44.471
259
- - type: ndcg_at_5
260
- value: 46.137
261
- - type: precision_at_1
262
- value: 39.628
263
- - type: precision_at_10
264
- value: 8.856
265
- - type: precision_at_100
266
- value: 1.429
267
- - type: precision_at_1000
268
- value: 0.191
269
- - type: precision_at_3
270
- value: 21.268
271
- - type: precision_at_5
272
- value: 14.649000000000001
273
- - type: recall_at_1
274
- value: 32.164
275
- - type: recall_at_10
276
- value: 59.609
277
- - type: recall_at_100
278
- value: 80.521
279
- - type: recall_at_1000
280
- value: 94.245
281
- - type: recall_at_3
282
- value: 46.521
283
- - type: recall_at_5
284
- value: 52.083999999999996
285
- - task:
286
- type: Retrieval
287
- dataset:
288
- type: BeIR/cqadupstack
289
- name: MTEB CQADupstackEnglishRetrieval
290
- config: default
291
- split: test
292
- revision: None
293
- metrics:
294
- - type: map_at_1
295
- value: 31.526
296
- - type: map_at_10
297
- value: 41.581
298
- - type: map_at_100
299
- value: 42.815999999999995
300
- - type: map_at_1000
301
- value: 42.936
302
- - type: map_at_3
303
- value: 38.605000000000004
304
- - type: map_at_5
305
- value: 40.351
306
- - type: mrr_at_1
307
- value: 39.489999999999995
308
- - type: mrr_at_10
309
- value: 47.829
310
- - type: mrr_at_100
311
- value: 48.512
312
- - type: mrr_at_1000
313
- value: 48.552
314
- - type: mrr_at_3
315
- value: 45.754
316
- - type: mrr_at_5
317
- value: 46.986
318
- - type: ndcg_at_1
319
- value: 39.489999999999995
320
- - type: ndcg_at_10
321
- value: 47.269
322
- - type: ndcg_at_100
323
- value: 51.564
324
- - type: ndcg_at_1000
325
- value: 53.53099999999999
326
- - type: ndcg_at_3
327
- value: 43.301
328
- - type: ndcg_at_5
329
- value: 45.239000000000004
330
- - type: precision_at_1
331
- value: 39.489999999999995
332
- - type: precision_at_10
333
- value: 8.93
334
- - type: precision_at_100
335
- value: 1.415
336
- - type: precision_at_1000
337
- value: 0.188
338
- - type: precision_at_3
339
- value: 20.892
340
- - type: precision_at_5
341
- value: 14.865999999999998
342
- - type: recall_at_1
343
- value: 31.526
344
- - type: recall_at_10
345
- value: 56.76
346
- - type: recall_at_100
347
- value: 75.029
348
- - type: recall_at_1000
349
- value: 87.491
350
- - type: recall_at_3
351
- value: 44.786
352
- - type: recall_at_5
353
- value: 50.254
354
- - task:
355
- type: Retrieval
356
- dataset:
357
- type: BeIR/cqadupstack
358
- name: MTEB CQADupstackGamingRetrieval
359
- config: default
360
- split: test
361
- revision: None
362
- metrics:
363
- - type: map_at_1
364
- value: 40.987
365
- - type: map_at_10
366
- value: 52.827
367
- - type: map_at_100
368
- value: 53.751000000000005
369
- - type: map_at_1000
370
- value: 53.81
371
- - type: map_at_3
372
- value: 49.844
373
- - type: map_at_5
374
- value: 51.473
375
- - type: mrr_at_1
376
- value: 46.833999999999996
377
- - type: mrr_at_10
378
- value: 56.389
379
- - type: mrr_at_100
380
- value: 57.003
381
- - type: mrr_at_1000
382
- value: 57.034
383
- - type: mrr_at_3
384
- value: 54.17999999999999
385
- - type: mrr_at_5
386
- value: 55.486999999999995
387
- - type: ndcg_at_1
388
- value: 46.833999999999996
389
- - type: ndcg_at_10
390
- value: 58.372
391
- - type: ndcg_at_100
392
- value: 62.068
393
- - type: ndcg_at_1000
394
- value: 63.288
395
- - type: ndcg_at_3
396
- value: 53.400000000000006
397
- - type: ndcg_at_5
398
- value: 55.766000000000005
399
- - type: precision_at_1
400
- value: 46.833999999999996
401
- - type: precision_at_10
402
- value: 9.191
403
- - type: precision_at_100
404
- value: 1.192
405
- - type: precision_at_1000
406
- value: 0.134
407
- - type: precision_at_3
408
- value: 23.448
409
- - type: precision_at_5
410
- value: 15.862000000000002
411
- - type: recall_at_1
412
- value: 40.987
413
- - type: recall_at_10
414
- value: 71.146
415
- - type: recall_at_100
416
- value: 87.035
417
- - type: recall_at_1000
418
- value: 95.633
419
- - type: recall_at_3
420
- value: 58.025999999999996
421
- - type: recall_at_5
422
- value: 63.815999999999995
423
- - task:
424
- type: Retrieval
425
- dataset:
426
- type: BeIR/cqadupstack
427
- name: MTEB CQADupstackGisRetrieval
428
- config: default
429
- split: test
430
- revision: None
431
- metrics:
432
- - type: map_at_1
433
- value: 24.587
434
- - type: map_at_10
435
- value: 33.114
436
- - type: map_at_100
437
- value: 34.043
438
- - type: map_at_1000
439
- value: 34.123999999999995
440
- - type: map_at_3
441
- value: 30.45
442
- - type: map_at_5
443
- value: 31.813999999999997
444
- - type: mrr_at_1
445
- value: 26.554
446
- - type: mrr_at_10
447
- value: 35.148
448
- - type: mrr_at_100
449
- value: 35.926
450
- - type: mrr_at_1000
451
- value: 35.991
452
- - type: mrr_at_3
453
- value: 32.599000000000004
454
- - type: mrr_at_5
455
- value: 33.893
456
- - type: ndcg_at_1
457
- value: 26.554
458
- - type: ndcg_at_10
459
- value: 38.132
460
- - type: ndcg_at_100
461
- value: 42.78
462
- - type: ndcg_at_1000
463
- value: 44.919
464
- - type: ndcg_at_3
465
- value: 32.833
466
- - type: ndcg_at_5
467
- value: 35.168
468
- - type: precision_at_1
469
- value: 26.554
470
- - type: precision_at_10
471
- value: 5.921
472
- - type: precision_at_100
473
- value: 0.8659999999999999
474
- - type: precision_at_1000
475
- value: 0.109
476
- - type: precision_at_3
477
- value: 13.861
478
- - type: precision_at_5
479
- value: 9.605
480
- - type: recall_at_1
481
- value: 24.587
482
- - type: recall_at_10
483
- value: 51.690000000000005
484
- - type: recall_at_100
485
- value: 73.428
486
- - type: recall_at_1000
487
- value: 89.551
488
- - type: recall_at_3
489
- value: 37.336999999999996
490
- - type: recall_at_5
491
- value: 43.047000000000004
492
- - task:
493
- type: Retrieval
494
- dataset:
495
- type: BeIR/cqadupstack
496
- name: MTEB CQADupstackMathematicaRetrieval
497
- config: default
498
- split: test
499
- revision: None
500
- metrics:
501
- - type: map_at_1
502
- value: 16.715
503
- - type: map_at_10
504
- value: 24.251
505
- - type: map_at_100
506
- value: 25.326999999999998
507
- - type: map_at_1000
508
- value: 25.455
509
- - type: map_at_3
510
- value: 21.912000000000003
511
- - type: map_at_5
512
- value: 23.257
513
- - type: mrr_at_1
514
- value: 20.274
515
- - type: mrr_at_10
516
- value: 28.552
517
- - type: mrr_at_100
518
- value: 29.42
519
- - type: mrr_at_1000
520
- value: 29.497
521
- - type: mrr_at_3
522
- value: 26.14
523
- - type: mrr_at_5
524
- value: 27.502
525
- - type: ndcg_at_1
526
- value: 20.274
527
- - type: ndcg_at_10
528
- value: 29.088
529
- - type: ndcg_at_100
530
- value: 34.293
531
- - type: ndcg_at_1000
532
- value: 37.271
533
- - type: ndcg_at_3
534
- value: 24.708
535
- - type: ndcg_at_5
536
- value: 26.809
537
- - type: precision_at_1
538
- value: 20.274
539
- - type: precision_at_10
540
- value: 5.361
541
- - type: precision_at_100
542
- value: 0.915
543
- - type: precision_at_1000
544
- value: 0.13
545
- - type: precision_at_3
546
- value: 11.733
547
- - type: precision_at_5
548
- value: 8.556999999999999
549
- - type: recall_at_1
550
- value: 16.715
551
- - type: recall_at_10
552
- value: 39.587
553
- - type: recall_at_100
554
- value: 62.336000000000006
555
- - type: recall_at_1000
556
- value: 83.453
557
- - type: recall_at_3
558
- value: 27.839999999999996
559
- - type: recall_at_5
560
- value: 32.952999999999996
561
- - task:
562
- type: Retrieval
563
- dataset:
564
- type: BeIR/cqadupstack
565
- name: MTEB CQADupstackPhysicsRetrieval
566
- config: default
567
- split: test
568
- revision: None
569
- metrics:
570
- - type: map_at_1
571
- value: 28.793000000000003
572
- - type: map_at_10
573
- value: 38.582
574
- - type: map_at_100
575
- value: 39.881
576
- - type: map_at_1000
577
- value: 39.987
578
- - type: map_at_3
579
- value: 35.851
580
- - type: map_at_5
581
- value: 37.289
582
- - type: mrr_at_1
583
- value: 34.455999999999996
584
- - type: mrr_at_10
585
- value: 43.909
586
- - type: mrr_at_100
587
- value: 44.74
588
- - type: mrr_at_1000
589
- value: 44.786
590
- - type: mrr_at_3
591
- value: 41.659
592
- - type: mrr_at_5
593
- value: 43.010999999999996
594
- - type: ndcg_at_1
595
- value: 34.455999999999996
596
- - type: ndcg_at_10
597
- value: 44.266
598
- - type: ndcg_at_100
599
- value: 49.639
600
- - type: ndcg_at_1000
601
- value: 51.644
602
- - type: ndcg_at_3
603
- value: 39.865
604
- - type: ndcg_at_5
605
- value: 41.887
606
- - type: precision_at_1
607
- value: 34.455999999999996
608
- - type: precision_at_10
609
- value: 7.843999999999999
610
- - type: precision_at_100
611
- value: 1.243
612
- - type: precision_at_1000
613
- value: 0.158
614
- - type: precision_at_3
615
- value: 18.831999999999997
616
- - type: precision_at_5
617
- value: 13.147
618
- - type: recall_at_1
619
- value: 28.793000000000003
620
- - type: recall_at_10
621
- value: 55.68300000000001
622
- - type: recall_at_100
623
- value: 77.99000000000001
624
- - type: recall_at_1000
625
- value: 91.183
626
- - type: recall_at_3
627
- value: 43.293
628
- - type: recall_at_5
629
- value: 48.618
630
- - task:
631
- type: Retrieval
632
- dataset:
633
- type: BeIR/cqadupstack
634
- name: MTEB CQADupstackProgrammersRetrieval
635
- config: default
636
- split: test
637
- revision: None
638
- metrics:
639
- - type: map_at_1
640
- value: 25.907000000000004
641
- - type: map_at_10
642
- value: 35.519
643
- - type: map_at_100
644
- value: 36.806
645
- - type: map_at_1000
646
- value: 36.912
647
- - type: map_at_3
648
- value: 32.748
649
- - type: map_at_5
650
- value: 34.232
651
- - type: mrr_at_1
652
- value: 31.621
653
- - type: mrr_at_10
654
- value: 40.687
655
- - type: mrr_at_100
656
- value: 41.583
657
- - type: mrr_at_1000
658
- value: 41.638999999999996
659
- - type: mrr_at_3
660
- value: 38.527
661
- - type: mrr_at_5
662
- value: 39.612
663
- - type: ndcg_at_1
664
- value: 31.621
665
- - type: ndcg_at_10
666
- value: 41.003
667
- - type: ndcg_at_100
668
- value: 46.617999999999995
669
- - type: ndcg_at_1000
670
- value: 48.82
671
- - type: ndcg_at_3
672
- value: 36.542
673
- - type: ndcg_at_5
674
- value: 38.368
675
- - type: precision_at_1
676
- value: 31.621
677
- - type: precision_at_10
678
- value: 7.396999999999999
679
- - type: precision_at_100
680
- value: 1.191
681
- - type: precision_at_1000
682
- value: 0.153
683
- - type: precision_at_3
684
- value: 17.39
685
- - type: precision_at_5
686
- value: 12.1
687
- - type: recall_at_1
688
- value: 25.907000000000004
689
- - type: recall_at_10
690
- value: 52.115
691
- - type: recall_at_100
692
- value: 76.238
693
- - type: recall_at_1000
694
- value: 91.218
695
- - type: recall_at_3
696
- value: 39.417
697
- - type: recall_at_5
698
- value: 44.435
699
- - task:
700
- type: Retrieval
701
- dataset:
702
- type: BeIR/cqadupstack
703
- name: MTEB CQADupstackRetrieval
704
- config: default
705
- split: test
706
- revision: None
707
- metrics:
708
- - type: map_at_1
709
- value: 25.732166666666668
710
- - type: map_at_10
711
- value: 34.51616666666667
712
- - type: map_at_100
713
- value: 35.67241666666666
714
- - type: map_at_1000
715
- value: 35.78675
716
- - type: map_at_3
717
- value: 31.953416666666662
718
- - type: map_at_5
719
- value: 33.333
720
- - type: mrr_at_1
721
- value: 30.300166666666673
722
- - type: mrr_at_10
723
- value: 38.6255
724
- - type: mrr_at_100
725
- value: 39.46183333333334
726
- - type: mrr_at_1000
727
- value: 39.519999999999996
728
- - type: mrr_at_3
729
- value: 36.41299999999999
730
- - type: mrr_at_5
731
- value: 37.6365
732
- - type: ndcg_at_1
733
- value: 30.300166666666673
734
- - type: ndcg_at_10
735
- value: 39.61466666666667
736
- - type: ndcg_at_100
737
- value: 44.60808333333334
738
- - type: ndcg_at_1000
739
- value: 46.91708333333334
740
- - type: ndcg_at_3
741
- value: 35.26558333333333
742
- - type: ndcg_at_5
743
- value: 37.220000000000006
744
- - type: precision_at_1
745
- value: 30.300166666666673
746
- - type: precision_at_10
747
- value: 6.837416666666667
748
- - type: precision_at_100
749
- value: 1.10425
750
- - type: precision_at_1000
751
- value: 0.14875
752
- - type: precision_at_3
753
- value: 16.13716666666667
754
- - type: precision_at_5
755
- value: 11.2815
756
- - type: recall_at_1
757
- value: 25.732166666666668
758
- - type: recall_at_10
759
- value: 50.578916666666665
760
- - type: recall_at_100
761
- value: 72.42183333333334
762
- - type: recall_at_1000
763
- value: 88.48766666666667
764
- - type: recall_at_3
765
- value: 38.41325
766
- - type: recall_at_5
767
- value: 43.515750000000004
768
- - task:
769
- type: Retrieval
770
- dataset:
771
- type: BeIR/cqadupstack
772
- name: MTEB CQADupstackStatsRetrieval
773
- config: default
774
- split: test
775
- revision: None
776
- metrics:
777
- - type: map_at_1
778
- value: 23.951
779
- - type: map_at_10
780
- value: 30.974
781
- - type: map_at_100
782
- value: 31.804
783
- - type: map_at_1000
784
- value: 31.900000000000002
785
- - type: map_at_3
786
- value: 28.762
787
- - type: map_at_5
788
- value: 29.94
789
- - type: mrr_at_1
790
- value: 26.534000000000002
791
- - type: mrr_at_10
792
- value: 33.553
793
- - type: mrr_at_100
794
- value: 34.297
795
- - type: mrr_at_1000
796
- value: 34.36
797
- - type: mrr_at_3
798
- value: 31.391000000000002
799
- - type: mrr_at_5
800
- value: 32.525999999999996
801
- - type: ndcg_at_1
802
- value: 26.534000000000002
803
- - type: ndcg_at_10
804
- value: 35.112
805
- - type: ndcg_at_100
806
- value: 39.28
807
- - type: ndcg_at_1000
808
- value: 41.723
809
- - type: ndcg_at_3
810
- value: 30.902
811
- - type: ndcg_at_5
812
- value: 32.759
813
- - type: precision_at_1
814
- value: 26.534000000000002
815
- - type: precision_at_10
816
- value: 5.445
817
- - type: precision_at_100
818
- value: 0.819
819
- - type: precision_at_1000
820
- value: 0.11
821
- - type: precision_at_3
822
- value: 12.986
823
- - type: precision_at_5
824
- value: 9.049
825
- - type: recall_at_1
826
- value: 23.951
827
- - type: recall_at_10
828
- value: 45.24
829
- - type: recall_at_100
830
- value: 64.12299999999999
831
- - type: recall_at_1000
832
- value: 82.28999999999999
833
- - type: recall_at_3
834
- value: 33.806000000000004
835
- - type: recall_at_5
836
- value: 38.277
837
- - task:
838
- type: Retrieval
839
- dataset:
840
- type: BeIR/cqadupstack
841
- name: MTEB CQADupstackTexRetrieval
842
- config: default
843
- split: test
844
- revision: None
845
- metrics:
846
- - type: map_at_1
847
- value: 16.829
848
- - type: map_at_10
849
- value: 23.684
850
- - type: map_at_100
851
- value: 24.683
852
- - type: map_at_1000
853
- value: 24.81
854
- - type: map_at_3
855
- value: 21.554000000000002
856
- - type: map_at_5
857
- value: 22.768
858
- - type: mrr_at_1
859
- value: 20.096
860
- - type: mrr_at_10
861
- value: 27.230999999999998
862
- - type: mrr_at_100
863
- value: 28.083999999999996
864
- - type: mrr_at_1000
865
- value: 28.166000000000004
866
- - type: mrr_at_3
867
- value: 25.212
868
- - type: mrr_at_5
869
- value: 26.32
870
- - type: ndcg_at_1
871
- value: 20.096
872
- - type: ndcg_at_10
873
- value: 27.989000000000004
874
- - type: ndcg_at_100
875
- value: 32.847
876
- - type: ndcg_at_1000
877
- value: 35.896
878
- - type: ndcg_at_3
879
- value: 24.116
880
- - type: ndcg_at_5
881
- value: 25.964
882
- - type: precision_at_1
883
- value: 20.096
884
- - type: precision_at_10
885
- value: 5
886
- - type: precision_at_100
887
- value: 0.8750000000000001
888
- - type: precision_at_1000
889
- value: 0.131
890
- - type: precision_at_3
891
- value: 11.207
892
- - type: precision_at_5
893
- value: 8.08
894
- - type: recall_at_1
895
- value: 16.829
896
- - type: recall_at_10
897
- value: 37.407000000000004
898
- - type: recall_at_100
899
- value: 59.101000000000006
900
- - type: recall_at_1000
901
- value: 81.024
902
- - type: recall_at_3
903
- value: 26.739
904
- - type: recall_at_5
905
- value: 31.524
906
- - task:
907
- type: Retrieval
908
- dataset:
909
- type: BeIR/cqadupstack
910
- name: MTEB CQADupstackUnixRetrieval
911
- config: default
912
- split: test
913
- revision: None
914
- metrics:
915
- - type: map_at_1
916
- value: 24.138
917
- - type: map_at_10
918
- value: 32.275999999999996
919
- - type: map_at_100
920
- value: 33.416000000000004
921
- - type: map_at_1000
922
- value: 33.527
923
- - type: map_at_3
924
- value: 29.854000000000003
925
- - type: map_at_5
926
- value: 31.096
927
- - type: mrr_at_1
928
- value: 28.450999999999997
929
- - type: mrr_at_10
930
- value: 36.214
931
- - type: mrr_at_100
932
- value: 37.134
933
- - type: mrr_at_1000
934
- value: 37.198
935
- - type: mrr_at_3
936
- value: 34.001999999999995
937
- - type: mrr_at_5
938
- value: 35.187000000000005
939
- - type: ndcg_at_1
940
- value: 28.450999999999997
941
- - type: ndcg_at_10
942
- value: 37.166
943
- - type: ndcg_at_100
944
- value: 42.454
945
- - type: ndcg_at_1000
946
- value: 44.976
947
- - type: ndcg_at_3
948
- value: 32.796
949
- - type: ndcg_at_5
950
- value: 34.631
951
- - type: precision_at_1
952
- value: 28.450999999999997
953
- - type: precision_at_10
954
- value: 6.241
955
- - type: precision_at_100
956
- value: 0.9950000000000001
957
- - type: precision_at_1000
958
- value: 0.133
959
- - type: precision_at_3
960
- value: 14.801
961
- - type: precision_at_5
962
- value: 10.280000000000001
963
- - type: recall_at_1
964
- value: 24.138
965
- - type: recall_at_10
966
- value: 48.111
967
- - type: recall_at_100
968
- value: 71.245
969
- - type: recall_at_1000
970
- value: 88.986
971
- - type: recall_at_3
972
- value: 36.119
973
- - type: recall_at_5
974
- value: 40.846
975
- - task:
976
- type: Retrieval
977
- dataset:
978
- type: BeIR/cqadupstack
979
- name: MTEB CQADupstackWebmastersRetrieval
980
- config: default
981
- split: test
982
- revision: None
983
- metrics:
984
- - type: map_at_1
985
- value: 23.244
986
- - type: map_at_10
987
- value: 31.227
988
- - type: map_at_100
989
- value: 33.007
990
- - type: map_at_1000
991
- value: 33.223
992
- - type: map_at_3
993
- value: 28.924
994
- - type: map_at_5
995
- value: 30.017
996
- - type: mrr_at_1
997
- value: 27.668
998
- - type: mrr_at_10
999
- value: 35.524
1000
- - type: mrr_at_100
1001
- value: 36.699
1002
- - type: mrr_at_1000
1003
- value: 36.759
1004
- - type: mrr_at_3
1005
- value: 33.366
1006
- - type: mrr_at_5
1007
- value: 34.552
1008
- - type: ndcg_at_1
1009
- value: 27.668
1010
- - type: ndcg_at_10
1011
- value: 36.381
1012
- - type: ndcg_at_100
1013
- value: 43.062
1014
- - type: ndcg_at_1000
1015
- value: 45.656
1016
- - type: ndcg_at_3
1017
- value: 32.501999999999995
1018
- - type: ndcg_at_5
1019
- value: 34.105999999999995
1020
- - type: precision_at_1
1021
- value: 27.668
1022
- - type: precision_at_10
1023
- value: 6.798
1024
- - type: precision_at_100
1025
- value: 1.492
1026
- - type: precision_at_1000
1027
- value: 0.234
1028
- - type: precision_at_3
1029
- value: 15.152
1030
- - type: precision_at_5
1031
- value: 10.791
1032
- - type: recall_at_1
1033
- value: 23.244
1034
- - type: recall_at_10
1035
- value: 45.979
1036
- - type: recall_at_100
1037
- value: 74.822
1038
- - type: recall_at_1000
1039
- value: 91.078
1040
- - type: recall_at_3
1041
- value: 34.925
1042
- - type: recall_at_5
1043
- value: 39.126
1044
- - task:
1045
- type: Retrieval
1046
- dataset:
1047
- type: BeIR/cqadupstack
1048
- name: MTEB CQADupstackWordpressRetrieval
1049
- config: default
1050
- split: test
1051
- revision: None
1052
- metrics:
1053
- - type: map_at_1
1054
- value: 19.945
1055
- - type: map_at_10
1056
- value: 27.517999999999997
1057
- - type: map_at_100
1058
- value: 28.588
1059
- - type: map_at_1000
1060
- value: 28.682000000000002
1061
- - type: map_at_3
1062
- value: 25.345000000000002
1063
- - type: map_at_5
1064
- value: 26.555
1065
- - type: mrr_at_1
1066
- value: 21.996
1067
- - type: mrr_at_10
1068
- value: 29.845
1069
- - type: mrr_at_100
1070
- value: 30.775999999999996
1071
- - type: mrr_at_1000
1072
- value: 30.845
1073
- - type: mrr_at_3
1074
- value: 27.726
1075
- - type: mrr_at_5
1076
- value: 28.882
1077
- - type: ndcg_at_1
1078
- value: 21.996
1079
- - type: ndcg_at_10
1080
- value: 32.034
1081
- - type: ndcg_at_100
1082
- value: 37.185
1083
- - type: ndcg_at_1000
1084
- value: 39.645
1085
- - type: ndcg_at_3
1086
- value: 27.750999999999998
1087
- - type: ndcg_at_5
1088
- value: 29.805999999999997
1089
- - type: precision_at_1
1090
- value: 21.996
1091
- - type: precision_at_10
1092
- value: 5.065
1093
- - type: precision_at_100
1094
- value: 0.819
1095
- - type: precision_at_1000
1096
- value: 0.11399999999999999
1097
- - type: precision_at_3
1098
- value: 12.076
1099
- - type: precision_at_5
1100
- value: 8.392
1101
- - type: recall_at_1
1102
- value: 19.945
1103
- - type: recall_at_10
1104
- value: 43.62
1105
- - type: recall_at_100
1106
- value: 67.194
1107
- - type: recall_at_1000
1108
- value: 85.7
1109
- - type: recall_at_3
1110
- value: 32.15
1111
- - type: recall_at_5
1112
- value: 37.208999999999996
1113
- - task:
1114
- type: Retrieval
1115
- dataset:
1116
- type: climate-fever
1117
- name: MTEB ClimateFEVER
1118
- config: default
1119
- split: test
1120
- revision: None
1121
- metrics:
1122
- - type: map_at_1
1123
- value: 18.279
1124
- - type: map_at_10
1125
- value: 31.052999999999997
1126
- - type: map_at_100
1127
- value: 33.125
1128
- - type: map_at_1000
1129
- value: 33.306000000000004
1130
- - type: map_at_3
1131
- value: 26.208
1132
- - type: map_at_5
1133
- value: 28.857
1134
- - type: mrr_at_1
1135
- value: 42.671
1136
- - type: mrr_at_10
1137
- value: 54.557
1138
- - type: mrr_at_100
1139
- value: 55.142
1140
- - type: mrr_at_1000
1141
- value: 55.169000000000004
1142
- - type: mrr_at_3
1143
- value: 51.488
1144
- - type: mrr_at_5
1145
- value: 53.439
1146
- - type: ndcg_at_1
1147
- value: 42.671
1148
- - type: ndcg_at_10
1149
- value: 41.276
1150
- - type: ndcg_at_100
1151
- value: 48.376000000000005
1152
- - type: ndcg_at_1000
1153
- value: 51.318
1154
- - type: ndcg_at_3
1155
- value: 35.068
1156
- - type: ndcg_at_5
1157
- value: 37.242
1158
- - type: precision_at_1
1159
- value: 42.671
1160
- - type: precision_at_10
1161
- value: 12.638
1162
- - type: precision_at_100
1163
- value: 2.045
1164
- - type: precision_at_1000
1165
- value: 0.26
1166
- - type: precision_at_3
1167
- value: 26.08
1168
- - type: precision_at_5
1169
- value: 19.805
1170
- - type: recall_at_1
1171
- value: 18.279
1172
- - type: recall_at_10
1173
- value: 46.946
1174
- - type: recall_at_100
1175
- value: 70.97200000000001
1176
- - type: recall_at_1000
1177
- value: 87.107
1178
- - type: recall_at_3
1179
- value: 31.147999999999996
1180
- - type: recall_at_5
1181
- value: 38.099
1182
- - task:
1183
- type: Retrieval
1184
- dataset:
1185
- type: dbpedia-entity
1186
- name: MTEB DBPedia
1187
- config: default
1188
- split: test
1189
- revision: None
1190
- metrics:
1191
- - type: map_at_1
1192
- value: 8.573
1193
- - type: map_at_10
1194
- value: 19.747
1195
- - type: map_at_100
1196
- value: 28.205000000000002
1197
- - type: map_at_1000
1198
- value: 29.831000000000003
1199
- - type: map_at_3
1200
- value: 14.109
1201
- - type: map_at_5
1202
- value: 16.448999999999998
1203
- - type: mrr_at_1
1204
- value: 71
1205
- - type: mrr_at_10
1206
- value: 77.68599999999999
1207
- - type: mrr_at_100
1208
- value: 77.995
1209
- - type: mrr_at_1000
1210
- value: 78.00200000000001
1211
- - type: mrr_at_3
1212
- value: 76.292
1213
- - type: mrr_at_5
1214
- value: 77.029
1215
- - type: ndcg_at_1
1216
- value: 59.12500000000001
1217
- - type: ndcg_at_10
1218
- value: 43.9
1219
- - type: ndcg_at_100
1220
- value: 47.863
1221
- - type: ndcg_at_1000
1222
- value: 54.848
1223
- - type: ndcg_at_3
1224
- value: 49.803999999999995
1225
- - type: ndcg_at_5
1226
- value: 46.317
1227
- - type: precision_at_1
1228
- value: 71
1229
- - type: precision_at_10
1230
- value: 34.4
1231
- - type: precision_at_100
1232
- value: 11.063
1233
- - type: precision_at_1000
1234
- value: 1.989
1235
- - type: precision_at_3
1236
- value: 52.333
1237
- - type: precision_at_5
1238
- value: 43.7
1239
- - type: recall_at_1
1240
- value: 8.573
1241
- - type: recall_at_10
1242
- value: 25.615
1243
- - type: recall_at_100
1244
- value: 53.385000000000005
1245
- - type: recall_at_1000
1246
- value: 75.46000000000001
1247
- - type: recall_at_3
1248
- value: 15.429
1249
- - type: recall_at_5
1250
- value: 19.357
1251
- - task:
1252
- type: Classification
1253
- dataset:
1254
- type: mteb/emotion
1255
- name: MTEB EmotionClassification
1256
- config: default
1257
- split: test
1258
- revision: 4f58c6b202a23cf9a4da393831edf4f9183cad37
1259
- metrics:
1260
- - type: accuracy
1261
- value: 47.989999999999995
1262
- - type: f1
1263
- value: 42.776314451497555
1264
- - task:
1265
- type: Retrieval
1266
- dataset:
1267
- type: fever
1268
- name: MTEB FEVER
1269
- config: default
1270
- split: test
1271
- revision: None
1272
- metrics:
1273
- - type: map_at_1
1274
- value: 74.13499999999999
1275
- - type: map_at_10
1276
- value: 82.825
1277
- - type: map_at_100
1278
- value: 83.096
1279
- - type: map_at_1000
1280
- value: 83.111
1281
- - type: map_at_3
1282
- value: 81.748
1283
- - type: map_at_5
1284
- value: 82.446
1285
- - type: mrr_at_1
1286
- value: 79.553
1287
- - type: mrr_at_10
1288
- value: 86.654
1289
- - type: mrr_at_100
1290
- value: 86.774
1291
- - type: mrr_at_1000
1292
- value: 86.778
1293
- - type: mrr_at_3
1294
- value: 85.981
1295
- - type: mrr_at_5
1296
- value: 86.462
1297
- - type: ndcg_at_1
1298
- value: 79.553
1299
- - type: ndcg_at_10
1300
- value: 86.345
1301
- - type: ndcg_at_100
1302
- value: 87.32
1303
- - type: ndcg_at_1000
1304
- value: 87.58200000000001
1305
- - type: ndcg_at_3
1306
- value: 84.719
1307
- - type: ndcg_at_5
1308
- value: 85.677
1309
- - type: precision_at_1
1310
- value: 79.553
1311
- - type: precision_at_10
1312
- value: 10.402000000000001
1313
- - type: precision_at_100
1314
- value: 1.1119999999999999
1315
- - type: precision_at_1000
1316
- value: 0.11499999999999999
1317
- - type: precision_at_3
1318
- value: 32.413
1319
- - type: precision_at_5
1320
- value: 20.138
1321
- - type: recall_at_1
1322
- value: 74.13499999999999
1323
- - type: recall_at_10
1324
- value: 93.215
1325
- - type: recall_at_100
1326
- value: 97.083
1327
- - type: recall_at_1000
1328
- value: 98.732
1329
- - type: recall_at_3
1330
- value: 88.79
1331
- - type: recall_at_5
1332
- value: 91.259
1333
- - task:
1334
- type: Retrieval
1335
- dataset:
1336
- type: fiqa
1337
- name: MTEB FiQA2018
1338
- config: default
1339
- split: test
1340
- revision: None
1341
- metrics:
1342
- - type: map_at_1
1343
- value: 18.298000000000002
1344
- - type: map_at_10
1345
- value: 29.901
1346
- - type: map_at_100
1347
- value: 31.528
1348
- - type: map_at_1000
1349
- value: 31.713
1350
- - type: map_at_3
1351
- value: 25.740000000000002
1352
- - type: map_at_5
1353
- value: 28.227999999999998
1354
- - type: mrr_at_1
1355
- value: 36.728
1356
- - type: mrr_at_10
1357
- value: 45.401
1358
- - type: mrr_at_100
1359
- value: 46.27
1360
- - type: mrr_at_1000
1361
- value: 46.315
1362
- - type: mrr_at_3
1363
- value: 42.978
1364
- - type: mrr_at_5
1365
- value: 44.29
1366
- - type: ndcg_at_1
1367
- value: 36.728
1368
- - type: ndcg_at_10
1369
- value: 37.456
1370
- - type: ndcg_at_100
1371
- value: 43.832
1372
- - type: ndcg_at_1000
1373
- value: 47
1374
- - type: ndcg_at_3
1375
- value: 33.694
1376
- - type: ndcg_at_5
1377
- value: 35.085
1378
- - type: precision_at_1
1379
- value: 36.728
1380
- - type: precision_at_10
1381
- value: 10.386
1382
- - type: precision_at_100
1383
- value: 1.701
1384
- - type: precision_at_1000
1385
- value: 0.22599999999999998
1386
- - type: precision_at_3
1387
- value: 22.479
1388
- - type: precision_at_5
1389
- value: 16.605
1390
- - type: recall_at_1
1391
- value: 18.298000000000002
1392
- - type: recall_at_10
1393
- value: 44.369
1394
- - type: recall_at_100
1395
- value: 68.098
1396
- - type: recall_at_1000
1397
- value: 87.21900000000001
1398
- - type: recall_at_3
1399
- value: 30.215999999999998
1400
- - type: recall_at_5
1401
- value: 36.861
1402
- - task:
1403
- type: Retrieval
1404
- dataset:
1405
- type: hotpotqa
1406
- name: MTEB HotpotQA
1407
- config: default
1408
- split: test
1409
- revision: None
1410
- metrics:
1411
- - type: map_at_1
1412
- value: 39.568
1413
- - type: map_at_10
1414
- value: 65.061
1415
- - type: map_at_100
1416
- value: 65.896
1417
- - type: map_at_1000
1418
- value: 65.95100000000001
1419
- - type: map_at_3
1420
- value: 61.831
1421
- - type: map_at_5
1422
- value: 63.849000000000004
1423
- - type: mrr_at_1
1424
- value: 79.136
1425
- - type: mrr_at_10
1426
- value: 84.58200000000001
1427
- - type: mrr_at_100
1428
- value: 84.765
1429
- - type: mrr_at_1000
1430
- value: 84.772
1431
- - type: mrr_at_3
1432
- value: 83.684
1433
- - type: mrr_at_5
1434
- value: 84.223
1435
- - type: ndcg_at_1
1436
- value: 79.136
1437
- - type: ndcg_at_10
1438
- value: 72.622
1439
- - type: ndcg_at_100
1440
- value: 75.539
1441
- - type: ndcg_at_1000
1442
- value: 76.613
1443
- - type: ndcg_at_3
1444
- value: 68.065
1445
- - type: ndcg_at_5
1446
- value: 70.58
1447
- - type: precision_at_1
1448
- value: 79.136
1449
- - type: precision_at_10
1450
- value: 15.215
1451
- - type: precision_at_100
1452
- value: 1.7500000000000002
1453
- - type: precision_at_1000
1454
- value: 0.189
1455
- - type: precision_at_3
1456
- value: 44.011
1457
- - type: precision_at_5
1458
- value: 28.388999999999996
1459
- - type: recall_at_1
1460
- value: 39.568
1461
- - type: recall_at_10
1462
- value: 76.077
1463
- - type: recall_at_100
1464
- value: 87.481
1465
- - type: recall_at_1000
1466
- value: 94.56400000000001
1467
- - type: recall_at_3
1468
- value: 66.01599999999999
1469
- - type: recall_at_5
1470
- value: 70.97200000000001
1471
- - task:
1472
- type: Classification
1473
- dataset:
1474
- type: mteb/imdb
1475
- name: MTEB ImdbClassification
1476
- config: default
1477
- split: test
1478
- revision: 3d86128a09e091d6018b6d26cad27f2739fc2db7
1479
- metrics:
1480
- - type: accuracy
1481
- value: 85.312
1482
- - type: ap
1483
- value: 80.36296867333715
1484
- - type: f1
1485
- value: 85.26613311552218
1486
- - task:
1487
- type: Retrieval
1488
- dataset:
1489
- type: msmarco
1490
- name: MTEB MSMARCO
1491
- config: default
1492
- split: dev
1493
- revision: None
1494
- metrics:
1495
- - type: map_at_1
1496
- value: 23.363999999999997
1497
- - type: map_at_10
1498
- value: 35.711999999999996
1499
- - type: map_at_100
1500
- value: 36.876999999999995
1501
- - type: map_at_1000
1502
- value: 36.923
1503
- - type: map_at_3
1504
- value: 32.034
1505
- - type: map_at_5
1506
- value: 34.159
1507
- - type: mrr_at_1
1508
- value: 24.04
1509
- - type: mrr_at_10
1510
- value: 36.345
1511
- - type: mrr_at_100
1512
- value: 37.441
1513
- - type: mrr_at_1000
1514
- value: 37.480000000000004
1515
- - type: mrr_at_3
1516
- value: 32.713
1517
- - type: mrr_at_5
1518
- value: 34.824
1519
- - type: ndcg_at_1
1520
- value: 24.026
1521
- - type: ndcg_at_10
1522
- value: 42.531
1523
- - type: ndcg_at_100
1524
- value: 48.081
1525
- - type: ndcg_at_1000
1526
- value: 49.213
1527
- - type: ndcg_at_3
1528
- value: 35.044
1529
- - type: ndcg_at_5
1530
- value: 38.834
1531
- - type: precision_at_1
1532
- value: 24.026
1533
- - type: precision_at_10
1534
- value: 6.622999999999999
1535
- - type: precision_at_100
1536
- value: 0.941
1537
- - type: precision_at_1000
1538
- value: 0.104
1539
- - type: precision_at_3
1540
- value: 14.909
1541
- - type: precision_at_5
1542
- value: 10.871
1543
- - type: recall_at_1
1544
- value: 23.363999999999997
1545
- - type: recall_at_10
1546
- value: 63.426
1547
- - type: recall_at_100
1548
- value: 88.96300000000001
1549
- - type: recall_at_1000
1550
- value: 97.637
1551
- - type: recall_at_3
1552
- value: 43.095
1553
- - type: recall_at_5
1554
- value: 52.178000000000004
1555
- - task:
1556
- type: Classification
1557
- dataset:
1558
- type: mteb/mtop_domain
1559
- name: MTEB MTOPDomainClassification (en)
1560
- config: en
1561
- split: test
1562
- revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
1563
- metrics:
1564
- - type: accuracy
1565
- value: 93.0095759233926
1566
- - type: f1
1567
- value: 92.78387794667408
1568
- - task:
1569
- type: Classification
1570
- dataset:
1571
- type: mteb/mtop_intent
1572
- name: MTEB MTOPIntentClassification (en)
1573
- config: en
1574
- split: test
1575
- revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
1576
- metrics:
1577
- - type: accuracy
1578
- value: 75.0296397628819
1579
- - type: f1
1580
- value: 58.45699589820874
1581
- - task:
1582
- type: Classification
1583
- dataset:
1584
- type: mteb/amazon_massive_intent
1585
- name: MTEB MassiveIntentClassification (en)
1586
- config: en
1587
- split: test
1588
- revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
1589
- metrics:
1590
- - type: accuracy
1591
- value: 73.45662407531944
1592
- - type: f1
1593
- value: 71.42364781421813
1594
- - task:
1595
- type: Classification
1596
- dataset:
1597
- type: mteb/amazon_massive_scenario
1598
- name: MTEB MassiveScenarioClassification (en)
1599
- config: en
1600
- split: test
1601
- revision: 7d571f92784cd94a019292a1f45445077d0ef634
1602
- metrics:
1603
- - type: accuracy
1604
- value: 77.07800941492937
1605
- - type: f1
1606
- value: 77.22799045640845
1607
- - task:
1608
- type: Clustering
1609
- dataset:
1610
- type: mteb/medrxiv-clustering-p2p
1611
- name: MTEB MedrxivClusteringP2P
1612
- config: default
1613
- split: test
1614
- revision: e7a26af6f3ae46b30dde8737f02c07b1505bcc73
1615
- metrics:
1616
- - type: v_measure
1617
- value: 34.531234379250606
1618
- - task:
1619
- type: Clustering
1620
- dataset:
1621
- type: mteb/medrxiv-clustering-s2s
1622
- name: MTEB MedrxivClusteringS2S
1623
- config: default
1624
- split: test
1625
- revision: 35191c8c0dca72d8ff3efcd72aa802307d469663
1626
- metrics:
1627
- - type: v_measure
1628
- value: 30.941490381193802
1629
- - task:
1630
- type: Reranking
1631
- dataset:
1632
- type: mteb/mind_small
1633
- name: MTEB MindSmallReranking
1634
- config: default
1635
- split: test
1636
- revision: 3bdac13927fdc888b903db93b2ffdbd90b295a69
1637
- metrics:
1638
- - type: map
1639
- value: 30.3115090856725
1640
- - type: mrr
1641
- value: 31.290667638675757
1642
- - task:
1643
- type: Retrieval
1644
- dataset:
1645
- type: nfcorpus
1646
- name: MTEB NFCorpus
1647
- config: default
1648
- split: test
1649
- revision: None
1650
- metrics:
1651
- - type: map_at_1
1652
- value: 5.465
1653
- - type: map_at_10
1654
- value: 13.03
1655
- - type: map_at_100
1656
- value: 16.057
1657
- - type: map_at_1000
1658
- value: 17.49
1659
- - type: map_at_3
1660
- value: 9.553
1661
- - type: map_at_5
1662
- value: 11.204
1663
- - type: mrr_at_1
1664
- value: 43.653
1665
- - type: mrr_at_10
1666
- value: 53.269
1667
- - type: mrr_at_100
1668
- value: 53.72
1669
- - type: mrr_at_1000
1670
- value: 53.761
1671
- - type: mrr_at_3
1672
- value: 50.929
1673
- - type: mrr_at_5
1674
- value: 52.461
1675
- - type: ndcg_at_1
1676
- value: 42.26
1677
- - type: ndcg_at_10
1678
- value: 34.673
1679
- - type: ndcg_at_100
1680
- value: 30.759999999999998
1681
- - type: ndcg_at_1000
1682
- value: 39.728
1683
- - type: ndcg_at_3
1684
- value: 40.349000000000004
1685
- - type: ndcg_at_5
1686
- value: 37.915
1687
- - type: precision_at_1
1688
- value: 43.653
1689
- - type: precision_at_10
1690
- value: 25.789
1691
- - type: precision_at_100
1692
- value: 7.754999999999999
1693
- - type: precision_at_1000
1694
- value: 2.07
1695
- - type: precision_at_3
1696
- value: 38.596000000000004
1697
- - type: precision_at_5
1698
- value: 33.251
1699
- - type: recall_at_1
1700
- value: 5.465
1701
- - type: recall_at_10
1702
- value: 17.148
1703
- - type: recall_at_100
1704
- value: 29.768
1705
- - type: recall_at_1000
1706
- value: 62.239
1707
- - type: recall_at_3
1708
- value: 10.577
1709
- - type: recall_at_5
1710
- value: 13.315
1711
- - task:
1712
- type: Retrieval
1713
- dataset:
1714
- type: nq
1715
- name: MTEB NQ
1716
- config: default
1717
- split: test
1718
- revision: None
1719
- metrics:
1720
- - type: map_at_1
1721
- value: 37.008
1722
- - type: map_at_10
1723
- value: 52.467
1724
- - type: map_at_100
1725
- value: 53.342999999999996
1726
- - type: map_at_1000
1727
- value: 53.366
1728
- - type: map_at_3
1729
- value: 48.412
1730
- - type: map_at_5
1731
- value: 50.875
1732
- - type: mrr_at_1
1733
- value: 41.541
1734
- - type: mrr_at_10
1735
- value: 54.967
1736
- - type: mrr_at_100
1737
- value: 55.611
1738
- - type: mrr_at_1000
1739
- value: 55.627
1740
- - type: mrr_at_3
1741
- value: 51.824999999999996
1742
- - type: mrr_at_5
1743
- value: 53.763000000000005
1744
- - type: ndcg_at_1
1745
- value: 41.541
1746
- - type: ndcg_at_10
1747
- value: 59.724999999999994
1748
- - type: ndcg_at_100
1749
- value: 63.38700000000001
1750
- - type: ndcg_at_1000
1751
- value: 63.883
1752
- - type: ndcg_at_3
1753
- value: 52.331
1754
- - type: ndcg_at_5
1755
- value: 56.327000000000005
1756
- - type: precision_at_1
1757
- value: 41.541
1758
- - type: precision_at_10
1759
- value: 9.447
1760
- - type: precision_at_100
1761
- value: 1.1520000000000001
1762
- - type: precision_at_1000
1763
- value: 0.12
1764
- - type: precision_at_3
1765
- value: 23.262
1766
- - type: precision_at_5
1767
- value: 16.314999999999998
1768
- - type: recall_at_1
1769
- value: 37.008
1770
- - type: recall_at_10
1771
- value: 79.145
1772
- - type: recall_at_100
1773
- value: 94.986
1774
- - type: recall_at_1000
1775
- value: 98.607
1776
- - type: recall_at_3
1777
- value: 60.277
1778
- - type: recall_at_5
1779
- value: 69.407
1780
- - task:
1781
- type: Retrieval
1782
- dataset:
1783
- type: quora
1784
- name: MTEB QuoraRetrieval
1785
- config: default
1786
- split: test
1787
- revision: None
1788
- metrics:
1789
- - type: map_at_1
1790
- value: 70.402
1791
- - type: map_at_10
1792
- value: 84.181
1793
- - type: map_at_100
1794
- value: 84.796
1795
- - type: map_at_1000
1796
- value: 84.81400000000001
1797
- - type: map_at_3
1798
- value: 81.209
1799
- - type: map_at_5
1800
- value: 83.085
1801
- - type: mrr_at_1
1802
- value: 81.02000000000001
1803
- - type: mrr_at_10
1804
- value: 87.263
1805
- - type: mrr_at_100
1806
- value: 87.36
1807
- - type: mrr_at_1000
1808
- value: 87.36
1809
- - type: mrr_at_3
1810
- value: 86.235
1811
- - type: mrr_at_5
1812
- value: 86.945
1813
- - type: ndcg_at_1
1814
- value: 81.01
1815
- - type: ndcg_at_10
1816
- value: 87.99900000000001
1817
- - type: ndcg_at_100
1818
- value: 89.217
1819
- - type: ndcg_at_1000
1820
- value: 89.33
1821
- - type: ndcg_at_3
1822
- value: 85.053
1823
- - type: ndcg_at_5
1824
- value: 86.703
1825
- - type: precision_at_1
1826
- value: 81.01
1827
- - type: precision_at_10
1828
- value: 13.336
1829
- - type: precision_at_100
1830
- value: 1.52
1831
- - type: precision_at_1000
1832
- value: 0.156
1833
- - type: precision_at_3
1834
- value: 37.14
1835
- - type: precision_at_5
1836
- value: 24.44
1837
- - type: recall_at_1
1838
- value: 70.402
1839
- - type: recall_at_10
1840
- value: 95.214
1841
- - type: recall_at_100
1842
- value: 99.438
1843
- - type: recall_at_1000
1844
- value: 99.928
1845
- - type: recall_at_3
1846
- value: 86.75699999999999
1847
- - type: recall_at_5
1848
- value: 91.44099999999999
1849
- - task:
1850
- type: Clustering
1851
- dataset:
1852
- type: mteb/reddit-clustering
1853
- name: MTEB RedditClustering
1854
- config: default
1855
- split: test
1856
- revision: 24640382cdbf8abc73003fb0fa6d111a705499eb
1857
- metrics:
1858
- - type: v_measure
1859
- value: 56.51721502758904
1860
- - task:
1861
- type: Clustering
1862
- dataset:
1863
- type: mteb/reddit-clustering-p2p
1864
- name: MTEB RedditClusteringP2P
1865
- config: default
1866
- split: test
1867
- revision: 282350215ef01743dc01b456c7f5241fa8937f16
1868
- metrics:
1869
- - type: v_measure
1870
- value: 61.054808572333016
1871
- - task:
1872
- type: Retrieval
1873
- dataset:
1874
- type: scidocs
1875
- name: MTEB SCIDOCS
1876
- config: default
1877
- split: test
1878
- revision: None
1879
- metrics:
1880
- - type: map_at_1
1881
- value: 4.578
1882
- - type: map_at_10
1883
- value: 11.036999999999999
1884
- - type: map_at_100
1885
- value: 12.879999999999999
1886
- - type: map_at_1000
1887
- value: 13.150999999999998
1888
- - type: map_at_3
1889
- value: 8.133
1890
- - type: map_at_5
1891
- value: 9.559
1892
- - type: mrr_at_1
1893
- value: 22.6
1894
- - type: mrr_at_10
1895
- value: 32.68
1896
- - type: mrr_at_100
1897
- value: 33.789
1898
- - type: mrr_at_1000
1899
- value: 33.854
1900
- - type: mrr_at_3
1901
- value: 29.7
1902
- - type: mrr_at_5
1903
- value: 31.480000000000004
1904
- - type: ndcg_at_1
1905
- value: 22.6
1906
- - type: ndcg_at_10
1907
- value: 18.616
1908
- - type: ndcg_at_100
1909
- value: 25.883
1910
- - type: ndcg_at_1000
1911
- value: 30.944
1912
- - type: ndcg_at_3
1913
- value: 18.136
1914
- - type: ndcg_at_5
1915
- value: 15.625
1916
- - type: precision_at_1
1917
- value: 22.6
1918
- - type: precision_at_10
1919
- value: 9.48
1920
- - type: precision_at_100
1921
- value: 1.991
1922
- - type: precision_at_1000
1923
- value: 0.321
1924
- - type: precision_at_3
1925
- value: 16.8
1926
- - type: precision_at_5
1927
- value: 13.54
1928
- - type: recall_at_1
1929
- value: 4.578
1930
- - type: recall_at_10
1931
- value: 19.213
1932
- - type: recall_at_100
1933
- value: 40.397
1934
- - type: recall_at_1000
1935
- value: 65.2
1936
- - type: recall_at_3
1937
- value: 10.208
1938
- - type: recall_at_5
1939
- value: 13.718
1940
- - task:
1941
- type: STS
1942
- dataset:
1943
- type: mteb/sickr-sts
1944
- name: MTEB SICK-R
1945
- config: default
1946
- split: test
1947
- revision: a6ea5a8cab320b040a23452cc28066d9beae2cee
1948
- metrics:
1949
- - type: cos_sim_pearson
1950
- value: 83.44288351714071
1951
- - type: cos_sim_spearman
1952
- value: 79.37995604564952
1953
- - type: euclidean_pearson
1954
- value: 81.1078874670718
1955
- - type: euclidean_spearman
1956
- value: 79.37995905980499
1957
- - type: manhattan_pearson
1958
- value: 81.03697527288986
1959
- - type: manhattan_spearman
1960
- value: 79.33490235296236
1961
- - task:
1962
- type: STS
1963
- dataset:
1964
- type: mteb/sts12-sts
1965
- name: MTEB STS12
1966
- config: default
1967
- split: test
1968
- revision: a0d554a64d88156834ff5ae9920b964011b16384
1969
- metrics:
1970
- - type: cos_sim_pearson
1971
- value: 84.95557650436523
1972
- - type: cos_sim_spearman
1973
- value: 78.5190672399868
1974
- - type: euclidean_pearson
1975
- value: 81.58064025904707
1976
- - type: euclidean_spearman
1977
- value: 78.5190672399868
1978
- - type: manhattan_pearson
1979
- value: 81.52857930619889
1980
- - type: manhattan_spearman
1981
- value: 78.50421361308034
1982
- - task:
1983
- type: STS
1984
- dataset:
1985
- type: mteb/sts13-sts
1986
- name: MTEB STS13
1987
- config: default
1988
- split: test
1989
- revision: 7e90230a92c190f1bf69ae9002b8cea547a64cca
1990
- metrics:
1991
- - type: cos_sim_pearson
1992
- value: 84.79128416228737
1993
- - type: cos_sim_spearman
1994
- value: 86.05402451477147
1995
- - type: euclidean_pearson
1996
- value: 85.46280267054289
1997
- - type: euclidean_spearman
1998
- value: 86.05402451477147
1999
- - type: manhattan_pearson
2000
- value: 85.46278563858236
2001
- - type: manhattan_spearman
2002
- value: 86.08079590861004
2003
- - task:
2004
- type: STS
2005
- dataset:
2006
- type: mteb/sts14-sts
2007
- name: MTEB STS14
2008
- config: default
2009
- split: test
2010
- revision: 6031580fec1f6af667f0bd2da0a551cf4f0b2375
2011
- metrics:
2012
- - type: cos_sim_pearson
2013
- value: 83.20623089568763
2014
- - type: cos_sim_spearman
2015
- value: 81.53786907061009
2016
- - type: euclidean_pearson
2017
- value: 82.82272250091494
2018
- - type: euclidean_spearman
2019
- value: 81.53786907061009
2020
- - type: manhattan_pearson
2021
- value: 82.78850494027013
2022
- - type: manhattan_spearman
2023
- value: 81.5135618083407
2024
- - task:
2025
- type: STS
2026
- dataset:
2027
- type: mteb/sts15-sts
2028
- name: MTEB STS15
2029
- config: default
2030
- split: test
2031
- revision: ae752c7c21bf194d8b67fd573edf7ae58183cbe3
2032
- metrics:
2033
- - type: cos_sim_pearson
2034
- value: 85.46366618397936
2035
- - type: cos_sim_spearman
2036
- value: 86.96566013336908
2037
- - type: euclidean_pearson
2038
- value: 86.62651697548931
2039
- - type: euclidean_spearman
2040
- value: 86.96565526364454
2041
- - type: manhattan_pearson
2042
- value: 86.58812160258009
2043
- - type: manhattan_spearman
2044
- value: 86.9336484321288
2045
- - task:
2046
- type: STS
2047
- dataset:
2048
- type: mteb/sts16-sts
2049
- name: MTEB STS16
2050
- config: default
2051
- split: test
2052
- revision: 4d8694f8f0e0100860b497b999b3dbed754a0513
2053
- metrics:
2054
- - type: cos_sim_pearson
2055
- value: 82.51858358641559
2056
- - type: cos_sim_spearman
2057
- value: 84.7652527954999
2058
- - type: euclidean_pearson
2059
- value: 84.23914783766861
2060
- - type: euclidean_spearman
2061
- value: 84.7652527954999
2062
- - type: manhattan_pearson
2063
- value: 84.22749648503171
2064
- - type: manhattan_spearman
2065
- value: 84.74527996746386
2066
- - task:
2067
- type: STS
2068
- dataset:
2069
- type: mteb/sts17-crosslingual-sts
2070
- name: MTEB STS17 (en-en)
2071
- config: en-en
2072
- split: test
2073
- revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
2074
- metrics:
2075
- - type: cos_sim_pearson
2076
- value: 87.28026563313065
2077
- - type: cos_sim_spearman
2078
- value: 87.46928143824915
2079
- - type: euclidean_pearson
2080
- value: 88.30558762000372
2081
- - type: euclidean_spearman
2082
- value: 87.46928143824915
2083
- - type: manhattan_pearson
2084
- value: 88.10513330809331
2085
- - type: manhattan_spearman
2086
- value: 87.21069787834173
2087
- - task:
2088
- type: STS
2089
- dataset:
2090
- type: mteb/sts22-crosslingual-sts
2091
- name: MTEB STS22 (en)
2092
- config: en
2093
- split: test
2094
- revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
2095
- metrics:
2096
- - type: cos_sim_pearson
2097
- value: 62.376497134587375
2098
- - type: cos_sim_spearman
2099
- value: 65.0159550112516
2100
- - type: euclidean_pearson
2101
- value: 65.64572120879598
2102
- - type: euclidean_spearman
2103
- value: 65.0159550112516
2104
- - type: manhattan_pearson
2105
- value: 65.88143604989976
2106
- - type: manhattan_spearman
2107
- value: 65.17547297222434
2108
- - task:
2109
- type: STS
2110
- dataset:
2111
- type: mteb/stsbenchmark-sts
2112
- name: MTEB STSBenchmark
2113
- config: default
2114
- split: test
2115
- revision: b0fddb56ed78048fa8b90373c8a3cfc37b684831
2116
- metrics:
2117
- - type: cos_sim_pearson
2118
- value: 84.22876368947644
2119
- - type: cos_sim_spearman
2120
- value: 85.46935577445318
2121
- - type: euclidean_pearson
2122
- value: 85.32830231392005
2123
- - type: euclidean_spearman
2124
- value: 85.46935577445318
2125
- - type: manhattan_pearson
2126
- value: 85.30353211758495
2127
- - type: manhattan_spearman
2128
- value: 85.42821085956945
2129
- - task:
2130
- type: Reranking
2131
- dataset:
2132
- type: mteb/scidocs-reranking
2133
- name: MTEB SciDocsRR
2134
- config: default
2135
- split: test
2136
- revision: d3c5e1fc0b855ab6097bf1cda04dd73947d7caab
2137
- metrics:
2138
- - type: map
2139
- value: 80.60986667767133
2140
- - type: mrr
2141
- value: 94.29432314236236
2142
- - task:
2143
- type: Retrieval
2144
- dataset:
2145
- type: scifact
2146
- name: MTEB SciFact
2147
- config: default
2148
- split: test
2149
- revision: None
2150
- metrics:
2151
- - type: map_at_1
2152
- value: 54.528
2153
- - type: map_at_10
2154
- value: 65.187
2155
- - type: map_at_100
2156
- value: 65.62599999999999
2157
- - type: map_at_1000
2158
- value: 65.657
2159
- - type: map_at_3
2160
- value: 62.352
2161
- - type: map_at_5
2162
- value: 64.025
2163
- - type: mrr_at_1
2164
- value: 57.333
2165
- - type: mrr_at_10
2166
- value: 66.577
2167
- - type: mrr_at_100
2168
- value: 66.88
2169
- - type: mrr_at_1000
2170
- value: 66.908
2171
- - type: mrr_at_3
2172
- value: 64.556
2173
- - type: mrr_at_5
2174
- value: 65.739
2175
- - type: ndcg_at_1
2176
- value: 57.333
2177
- - type: ndcg_at_10
2178
- value: 70.275
2179
- - type: ndcg_at_100
2180
- value: 72.136
2181
- - type: ndcg_at_1000
2182
- value: 72.963
2183
- - type: ndcg_at_3
2184
- value: 65.414
2185
- - type: ndcg_at_5
2186
- value: 67.831
2187
- - type: precision_at_1
2188
- value: 57.333
2189
- - type: precision_at_10
2190
- value: 9.5
2191
- - type: precision_at_100
2192
- value: 1.057
2193
- - type: precision_at_1000
2194
- value: 0.11199999999999999
2195
- - type: precision_at_3
2196
- value: 25.778000000000002
2197
- - type: precision_at_5
2198
- value: 17.2
2199
- - type: recall_at_1
2200
- value: 54.528
2201
- - type: recall_at_10
2202
- value: 84.356
2203
- - type: recall_at_100
2204
- value: 92.833
2205
- - type: recall_at_1000
2206
- value: 99.333
2207
- - type: recall_at_3
2208
- value: 71.283
2209
- - type: recall_at_5
2210
- value: 77.14999999999999
2211
- - task:
2212
- type: PairClassification
2213
- dataset:
2214
- type: mteb/sprintduplicatequestions-pairclassification
2215
- name: MTEB SprintDuplicateQuestions
2216
- config: default
2217
- split: test
2218
- revision: d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46
2219
- metrics:
2220
- - type: cos_sim_accuracy
2221
- value: 99.74158415841585
2222
- - type: cos_sim_ap
2223
- value: 92.90048959850317
2224
- - type: cos_sim_f1
2225
- value: 86.35650810245687
2226
- - type: cos_sim_precision
2227
- value: 90.4709748083242
2228
- - type: cos_sim_recall
2229
- value: 82.6
2230
- - type: dot_accuracy
2231
- value: 99.74158415841585
2232
- - type: dot_ap
2233
- value: 92.90048959850317
2234
- - type: dot_f1
2235
- value: 86.35650810245687
2236
- - type: dot_precision
2237
- value: 90.4709748083242
2238
- - type: dot_recall
2239
- value: 82.6
2240
- - type: euclidean_accuracy
2241
- value: 99.74158415841585
2242
- - type: euclidean_ap
2243
- value: 92.90048959850317
2244
- - type: euclidean_f1
2245
- value: 86.35650810245687
2246
- - type: euclidean_precision
2247
- value: 90.4709748083242
2248
- - type: euclidean_recall
2249
- value: 82.6
2250
- - type: manhattan_accuracy
2251
- value: 99.74158415841585
2252
- - type: manhattan_ap
2253
- value: 92.87344692947894
2254
- - type: manhattan_f1
2255
- value: 86.38497652582159
2256
- - type: manhattan_precision
2257
- value: 90.29443838604145
2258
- - type: manhattan_recall
2259
- value: 82.8
2260
- - type: max_accuracy
2261
- value: 99.74158415841585
2262
- - type: max_ap
2263
- value: 92.90048959850317
2264
- - type: max_f1
2265
- value: 86.38497652582159
2266
- - task:
2267
- type: Clustering
2268
- dataset:
2269
- type: mteb/stackexchange-clustering
2270
- name: MTEB StackExchangeClustering
2271
- config: default
2272
- split: test
2273
- revision: 6cbc1f7b2bc0622f2e39d2c77fa502909748c259
2274
- metrics:
2275
- - type: v_measure
2276
- value: 63.191648770424216
2277
- - task:
2278
- type: Clustering
2279
- dataset:
2280
- type: mteb/stackexchange-clustering-p2p
2281
- name: MTEB StackExchangeClusteringP2P
2282
- config: default
2283
- split: test
2284
- revision: 815ca46b2622cec33ccafc3735d572c266efdb44
2285
- metrics:
2286
- - type: v_measure
2287
- value: 34.02944668730218
2288
- - task:
2289
- type: Reranking
2290
- dataset:
2291
- type: mteb/stackoverflowdupquestions-reranking
2292
- name: MTEB StackOverflowDupQuestions
2293
- config: default
2294
- split: test
2295
- revision: e185fbe320c72810689fc5848eb6114e1ef5ec69
2296
- metrics:
2297
- - type: map
2298
- value: 50.466386167525265
2299
- - type: mrr
2300
- value: 51.19071492233257
2301
- - task:
2302
- type: Summarization
2303
- dataset:
2304
- type: mteb/summeval
2305
- name: MTEB SummEval
2306
- config: default
2307
- split: test
2308
- revision: cda12ad7615edc362dbf25a00fdd61d3b1eaf93c
2309
- metrics:
2310
- - type: cos_sim_pearson
2311
- value: 30.198022505886435
2312
- - type: cos_sim_spearman
2313
- value: 30.40170257939193
2314
- - type: dot_pearson
2315
- value: 30.198015316402614
2316
- - type: dot_spearman
2317
- value: 30.40170257939193
2318
- - task:
2319
- type: Retrieval
2320
- dataset:
2321
- type: trec-covid
2322
- name: MTEB TRECCOVID
2323
- config: default
2324
- split: test
2325
- revision: None
2326
- metrics:
2327
- - type: map_at_1
2328
- value: 0.242
2329
- - type: map_at_10
2330
- value: 2.17
2331
- - type: map_at_100
2332
- value: 12.221
2333
- - type: map_at_1000
2334
- value: 28.63
2335
- - type: map_at_3
2336
- value: 0.728
2337
- - type: map_at_5
2338
- value: 1.185
2339
- - type: mrr_at_1
2340
- value: 94
2341
- - type: mrr_at_10
2342
- value: 97
2343
- - type: mrr_at_100
2344
- value: 97
2345
- - type: mrr_at_1000
2346
- value: 97
2347
- - type: mrr_at_3
2348
- value: 97
2349
- - type: mrr_at_5
2350
- value: 97
2351
- - type: ndcg_at_1
2352
- value: 89
2353
- - type: ndcg_at_10
2354
- value: 82.30499999999999
2355
- - type: ndcg_at_100
2356
- value: 61.839999999999996
2357
- - type: ndcg_at_1000
2358
- value: 53.381
2359
- - type: ndcg_at_3
2360
- value: 88.877
2361
- - type: ndcg_at_5
2362
- value: 86.05199999999999
2363
- - type: precision_at_1
2364
- value: 94
2365
- - type: precision_at_10
2366
- value: 87
2367
- - type: precision_at_100
2368
- value: 63.38
2369
- - type: precision_at_1000
2370
- value: 23.498
2371
- - type: precision_at_3
2372
- value: 94
2373
- - type: precision_at_5
2374
- value: 92
2375
- - type: recall_at_1
2376
- value: 0.242
2377
- - type: recall_at_10
2378
- value: 2.302
2379
- - type: recall_at_100
2380
- value: 14.979000000000001
2381
- - type: recall_at_1000
2382
- value: 49.638
2383
- - type: recall_at_3
2384
- value: 0.753
2385
- - type: recall_at_5
2386
- value: 1.226
2387
- - task:
2388
- type: Retrieval
2389
- dataset:
2390
- type: webis-touche2020
2391
- name: MTEB Touche2020
2392
- config: default
2393
- split: test
2394
- revision: None
2395
- metrics:
2396
- - type: map_at_1
2397
- value: 3.006
2398
- - type: map_at_10
2399
- value: 11.805
2400
- - type: map_at_100
2401
- value: 18.146
2402
- - type: map_at_1000
2403
- value: 19.788
2404
- - type: map_at_3
2405
- value: 5.914
2406
- - type: map_at_5
2407
- value: 8.801
2408
- - type: mrr_at_1
2409
- value: 40.816
2410
- - type: mrr_at_10
2411
- value: 56.36600000000001
2412
- - type: mrr_at_100
2413
- value: 56.721999999999994
2414
- - type: mrr_at_1000
2415
- value: 56.721999999999994
2416
- - type: mrr_at_3
2417
- value: 52.041000000000004
2418
- - type: mrr_at_5
2419
- value: 54.796
2420
- - type: ndcg_at_1
2421
- value: 37.755
2422
- - type: ndcg_at_10
2423
- value: 29.863
2424
- - type: ndcg_at_100
2425
- value: 39.571
2426
- - type: ndcg_at_1000
2427
- value: 51.385999999999996
2428
- - type: ndcg_at_3
2429
- value: 32.578
2430
- - type: ndcg_at_5
2431
- value: 32.351
2432
- - type: precision_at_1
2433
- value: 40.816
2434
- - type: precision_at_10
2435
- value: 26.531
2436
- - type: precision_at_100
2437
- value: 7.796
2438
- - type: precision_at_1000
2439
- value: 1.555
2440
- - type: precision_at_3
2441
- value: 32.653
2442
- - type: precision_at_5
2443
- value: 33.061
2444
- - type: recall_at_1
2445
- value: 3.006
2446
- - type: recall_at_10
2447
- value: 18.738
2448
- - type: recall_at_100
2449
- value: 48.058
2450
- - type: recall_at_1000
2451
- value: 83.41300000000001
2452
- - type: recall_at_3
2453
- value: 7.166
2454
- - type: recall_at_5
2455
- value: 12.102
2456
- - task:
2457
- type: Classification
2458
- dataset:
2459
- type: mteb/toxic_conversations_50k
2460
- name: MTEB ToxicConversationsClassification
2461
- config: default
2462
- split: test
2463
- revision: d7c0de2777da35d6aae2200a62c6e0e5af397c4c
2464
- metrics:
2465
- - type: accuracy
2466
- value: 71.4178
2467
- - type: ap
2468
- value: 14.648781342150446
2469
- - type: f1
2470
- value: 55.07299194946378
2471
- - task:
2472
- type: Classification
2473
- dataset:
2474
- type: mteb/tweet_sentiment_extraction
2475
- name: MTEB TweetSentimentExtractionClassification
2476
- config: default
2477
- split: test
2478
- revision: d604517c81ca91fe16a244d1248fc021f9ecee7a
2479
- metrics:
2480
- - type: accuracy
2481
- value: 60.919637804187886
2482
- - type: f1
2483
- value: 61.24122013967399
2484
- - task:
2485
- type: Clustering
2486
- dataset:
2487
- type: mteb/twentynewsgroups-clustering
2488
- name: MTEB TwentyNewsgroupsClustering
2489
- config: default
2490
- split: test
2491
- revision: 6125ec4e24fa026cec8a478383ee943acfbd5449
2492
- metrics:
2493
- - type: v_measure
2494
- value: 49.207896583685695
2495
- - task:
2496
- type: PairClassification
2497
- dataset:
2498
- type: mteb/twittersemeval2015-pairclassification
2499
- name: MTEB TwitterSemEval2015
2500
- config: default
2501
- split: test
2502
- revision: 70970daeab8776df92f5ea462b6173c0b46fd2d1
2503
- metrics:
2504
- - type: cos_sim_accuracy
2505
- value: 86.23114978840078
2506
- - type: cos_sim_ap
2507
- value: 74.26624727825818
2508
- - type: cos_sim_f1
2509
- value: 68.72377190817083
2510
- - type: cos_sim_precision
2511
- value: 64.56400742115028
2512
- - type: cos_sim_recall
2513
- value: 73.45646437994723
2514
- - type: dot_accuracy
2515
- value: 86.23114978840078
2516
- - type: dot_ap
2517
- value: 74.26624032659652
2518
- - type: dot_f1
2519
- value: 68.72377190817083
2520
- - type: dot_precision
2521
- value: 64.56400742115028
2522
- - type: dot_recall
2523
- value: 73.45646437994723
2524
- - type: euclidean_accuracy
2525
- value: 86.23114978840078
2526
- - type: euclidean_ap
2527
- value: 74.26624714480556
2528
- - type: euclidean_f1
2529
- value: 68.72377190817083
2530
- - type: euclidean_precision
2531
- value: 64.56400742115028
2532
- - type: euclidean_recall
2533
- value: 73.45646437994723
2534
- - type: manhattan_accuracy
2535
- value: 86.16558383501221
2536
- - type: manhattan_ap
2537
- value: 74.2091943976357
2538
- - type: manhattan_f1
2539
- value: 68.64221520524654
2540
- - type: manhattan_precision
2541
- value: 63.59135913591359
2542
- - type: manhattan_recall
2543
- value: 74.5646437994723
2544
- - type: max_accuracy
2545
- value: 86.23114978840078
2546
- - type: max_ap
2547
- value: 74.26624727825818
2548
- - type: max_f1
2549
- value: 68.72377190817083
2550
- - task:
2551
- type: PairClassification
2552
- dataset:
2553
- type: mteb/twitterurlcorpus-pairclassification
2554
- name: MTEB TwitterURLCorpus
2555
- config: default
2556
- split: test
2557
- revision: 8b6510b0b1fa4e4c4f879467980e9be563ec1cdf
2558
- metrics:
2559
- - type: cos_sim_accuracy
2560
- value: 89.3681841114604
2561
- - type: cos_sim_ap
2562
- value: 86.65166387498546
2563
- - type: cos_sim_f1
2564
- value: 79.02581944698774
2565
- - type: cos_sim_precision
2566
- value: 75.35796605434099
2567
- - type: cos_sim_recall
2568
- value: 83.06898675700647
2569
- - type: dot_accuracy
2570
- value: 89.3681841114604
2571
- - type: dot_ap
2572
- value: 86.65166019802056
2573
- - type: dot_f1
2574
- value: 79.02581944698774
2575
- - type: dot_precision
2576
- value: 75.35796605434099
2577
- - type: dot_recall
2578
- value: 83.06898675700647
2579
- - type: euclidean_accuracy
2580
- value: 89.3681841114604
2581
- - type: euclidean_ap
2582
- value: 86.65166462876266
2583
- - type: euclidean_f1
2584
- value: 79.02581944698774
2585
- - type: euclidean_precision
2586
- value: 75.35796605434099
2587
- - type: euclidean_recall
2588
- value: 83.06898675700647
2589
- - type: manhattan_accuracy
2590
- value: 89.36624364497226
2591
- - type: manhattan_ap
2592
- value: 86.65076471274106
2593
- - type: manhattan_f1
2594
- value: 79.07408783532733
2595
- - type: manhattan_precision
2596
- value: 76.41102972856527
2597
- - type: manhattan_recall
2598
- value: 81.92947336002464
2599
- - type: max_accuracy
2600
- value: 89.3681841114604
2601
- - type: max_ap
2602
- value: 86.65166462876266
2603
- - type: max_f1
2604
- value: 79.07408783532733
2605
- license: apache-2.0
2606
- language:
2607
- - en
2608
  ---
2609
 
2610
- # nomic-embed-text-v1.5: Resizable Production Embeddings with Matryoshka Representation Learning
2611
-
2612
- [Blog](https://www.nomic.ai/blog/posts/nomic-embed-text-v1) | [Technical Report](https://arxiv.org/abs/2402.01613) | [AWS SageMaker](https://aws.amazon.com/marketplace/seller-profile?id=seller-tpqidcj54zawi) | [Nomic Platform](https://atlas.nomic.ai)
2613
-
2614
- **Exciting Update!**: `nomic-embed-text-v1.5` is now multimodal! [nomic-embed-vision-v1.5](https://huggingface.co/nomic-ai/nomic-embed-vision-v1.5) is aligned to the embedding space of `nomic-embed-text-v1.5`, meaning any text embedding is multimodal!
2615
-
2616
- ## Usage
2617
-
2618
- **Important**: the text prompt *must* include a *task instruction prefix*, instructing the model which task is being performed.
2619
-
2620
- For example, if you are implementing a RAG application, you embed your documents as `search_document: <text here>` and embed your user queries as `search_query: <text here>`.
2621
-
2622
- ## Task instruction prefixes
2623
-
2624
- ### `search_document`
2625
-
2626
- #### Purpose: embed texts as documents from a dataset
2627
-
2628
- This prefix is used for embedding texts as documents, for example as documents for a RAG index.
2629
-
2630
- ```python
2631
- from sentence_transformers import SentenceTransformer
2632
-
2633
- model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
2634
- sentences = ['search_document: TSNE is a dimensionality reduction algorithm created by Laurens van Der Maaten']
2635
- embeddings = model.encode(sentences)
2636
- print(embeddings)
2637
- ```
2638
-
2639
- ### `search_query`
2640
-
2641
- #### Purpose: embed texts as questions to answer
2642
-
2643
- This prefix is used for embedding texts as questions that documents from a dataset could resolve, for example as queries to be answered by a RAG application.
2644
-
2645
- ```python
2646
- from sentence_transformers import SentenceTransformer
2647
-
2648
- model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
2649
- sentences = ['search_query: Who is Laurens van Der Maaten?']
2650
- embeddings = model.encode(sentences)
2651
- print(embeddings)
2652
- ```
2653
-
2654
- ### `clustering`
2655
-
2656
- #### Purpose: embed texts to group them into clusters
2657
-
2658
- This prefix is used for embedding texts in order to group them into clusters, discover common topics, or remove semantic duplicates.
2659
-
2660
- ```python
2661
- from sentence_transformers import SentenceTransformer
2662
-
2663
- model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
2664
- sentences = ['clustering: the quick brown fox']
2665
- embeddings = model.encode(sentences)
2666
- print(embeddings)
2667
- ```
2668
-
2669
- ### `classification`
2670
-
2671
- #### Purpose: embed texts to classify them
2672
-
2673
- This prefix is used for embedding texts into vectors that will be used as features for a classification model
2674
-
2675
- ```python
2676
- from sentence_transformers import SentenceTransformer
2677
-
2678
- model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
2679
- sentences = ['classification: the quick brown fox']
2680
- embeddings = model.encode(sentences)
2681
- print(embeddings)
2682
- ```
2683
-
2684
-
2685
- ### Sentence Transformers
2686
- ```python
2687
- import torch.nn.functional as F
2688
- from sentence_transformers import SentenceTransformer
2689
-
2690
- matryoshka_dim = 512
2691
-
2692
- model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
2693
- sentences = ['search_query: What is TSNE?', 'search_query: Who is Laurens van der Maaten?']
2694
- embeddings = model.encode(sentences, convert_to_tensor=True)
2695
- embeddings = F.layer_norm(embeddings, normalized_shape=(embeddings.shape[1],))
2696
- embeddings = embeddings[:, :matryoshka_dim]
2697
- embeddings = F.normalize(embeddings, p=2, dim=1)
2698
- print(embeddings)
2699
- ```
2700
-
2701
- ### Transformers
2702
-
2703
- ```diff
2704
- import torch
2705
- import torch.nn.functional as F
2706
- from transformers import AutoTokenizer, AutoModel
 
 
 
 
 
 
 
 
 
2707
 
2708
- def mean_pooling(model_output, attention_mask):
2709
- token_embeddings = model_output[0]
2710
- input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
2711
- return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
2712
-
2713
- sentences = ['search_query: What is TSNE?', 'search_query: Who is Laurens van der Maaten?']
2714
-
2715
- tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
2716
- model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True, safe_serialization=True)
2717
- model.eval()
2718
-
2719
- encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
2720
-
2721
- + matryoshka_dim = 512
2722
-
2723
- with torch.no_grad():
2724
- model_output = model(**encoded_input)
2725
-
2726
- embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
2727
- + embeddings = F.layer_norm(embeddings, normalized_shape=(embeddings.shape[1],))
2728
- + embeddings = embeddings[:, :matryoshka_dim]
2729
- embeddings = F.normalize(embeddings, p=2, dim=1)
2730
- print(embeddings)
2731
- ```
2732
-
2733
- The model natively supports scaling of the sequence length past 2048 tokens. To do so,
2734
-
2735
- ```diff
2736
- - tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
2737
- + tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', model_max_length=8192)
2738
-
2739
-
2740
- - model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)
2741
- + model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True, rotary_scaling_factor=2)
2742
- ```
2743
-
2744
- ### Transformers.js
2745
-
2746
- ```js
2747
- import { pipeline, layer_norm } from '@huggingface/transformers';
2748
-
2749
- // Create a feature extraction pipeline
2750
- const extractor = await pipeline('feature-extraction', 'nomic-ai/nomic-embed-text-v1.5');
2751
-
2752
- // Define sentences
2753
- const texts = ['search_query: What is TSNE?', 'search_query: Who is Laurens van der Maaten?'];
2754
-
2755
- // Compute sentence embeddings
2756
- let embeddings = await extractor(texts, { pooling: 'mean' });
2757
- console.log(embeddings); // Tensor of shape [2, 768]
2758
-
2759
- const matryoshka_dim = 512;
2760
- embeddings = layer_norm(embeddings, [embeddings.dims[1]])
2761
- .slice(null, [0, matryoshka_dim])
2762
- .normalize(2, -1);
2763
- console.log(embeddings.tolist());
2764
  ```
2765
 
2766
-
2767
- ## Nomic API
2768
-
2769
- The easiest way to use Nomic Embed is through the Nomic Embedding API.
2770
-
2771
- Generating embeddings with the `nomic` Python client is as easy as
2772
 
2773
  ```python
2774
- from nomic import embed
2775
-
2776
- output = embed.text(
2777
- texts=['Nomic Embedding API', '#keepAIOpen'],
2778
- model='nomic-embed-text-v1.5',
2779
- task_type='search_document',
2780
- dimensionality=256,
2781
- )
2782
 
2783
- print(output)
 
 
 
2784
  ```
2785
 
2786
- For more information, see the [API reference](https://docs.nomic.ai/reference/endpoints/nomic-embed-text)
2787
-
2788
-
2789
- ## Infinity
2790
-
2791
- Usage with [Infinity](https://github.com/michaelfeil/infinity).
2792
-
2793
- ```bash
2794
- docker run --gpus all -v $PWD/data:/app/.cache -e HF_TOKEN=$HF_TOKEN -p "7997":"7997" \
2795
- michaelf34/infinity:0.0.70 \
2796
- v2 --model-id nomic-ai/nomic-embed-text-v1.5 --revision "main" --dtype float16 --batch-size 8 --engine torch --port 7997 --no-bettertransformer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2797
  ```
2798
 
2799
- ## Adjusting Dimensionality
2800
-
2801
- `nomic-embed-text-v1.5` is an improvement upon [Nomic Embed](https://huggingface.co/nomic-ai/nomic-embed-text-v1) that utilizes [Matryoshka Representation Learning](https://arxiv.org/abs/2205.13147) which gives developers the flexibility to trade off the embedding size for a negligible reduction in performance.
2802
-
2803
-
2804
- | Name | SeqLen | Dimension | MTEB |
2805
- | :-------------------------------:| :----- | :-------- | :------: |
2806
- | nomic-embed-text-v1 | 8192 | 768 | **62.39** |
2807
- | nomic-embed-text-v1.5 | 8192 | 768 | 62.28 |
2808
- | nomic-embed-text-v1.5 | 8192 | 512 | 61.96 |
2809
- | nomic-embed-text-v1.5 | 8192 | 256 | 61.04 |
2810
- | nomic-embed-text-v1.5 | 8192 | 128 | 59.34 |
2811
- | nomic-embed-text-v1.5 | 8192 | 64 | 56.10 |
2812
-
2813
 
2814
- ![image/png](https://cdn-uploads.huggingface.co/production/uploads/607997c83a565c15675055b3/CRnaHV-c2wMUMZKw72q85.png)
 
2815
 
2816
- ## Training
2817
- Click the Nomic Atlas map below to visualize a 5M sample of our contrastive pretraining data!
2818
 
2819
- [![image/webp](https://cdn-uploads.huggingface.co/production/uploads/607997c83a565c15675055b3/pjhJhuNyRfPagRd_c_iUz.webp)](https://atlas.nomic.ai/map/nomic-text-embed-v1-5m-sample)
 
2820
 
2821
- We train our embedder using a multi-stage training pipeline. Starting from a long-context [BERT model](https://huggingface.co/nomic-ai/nomic-bert-2048),
2822
- the first unsupervised contrastive stage trains on a dataset generated from weakly related text pairs, such as question-answer pairs from forums like StackExchange and Quora, title-body pairs from Amazon reviews, and summarizations from news articles.
2823
 
2824
- In the second finetuning stage, higher quality labeled datasets such as search queries and answers from web searches are leveraged. Data curation and hard-example mining is crucial in this stage.
2825
-
2826
- For more details, see the Nomic Embed [Technical Report](https://static.nomic.ai/reports/2024_Nomic_Embed_Text_Technical_Report.pdf) and corresponding [blog post](https://blog.nomic.ai/posts/nomic-embed-matryoshka).
2827
-
2828
- Training data to train the models is released in its entirety. For more details, see the `contrastors` [repository](https://github.com/nomic-ai/contrastors)
2829
-
2830
-
2831
- # Join the Nomic Community
2832
-
2833
- - Nomic: [https://nomic.ai](https://nomic.ai)
2834
- - Discord: [https://discord.gg/myY5YDR8z8](https://discord.gg/myY5YDR8z8)
2835
- - Twitter: [https://twitter.com/nomic_ai](https://twitter.com/nomic_ai)
2836
-
2837
-
2838
- # Citation
2839
-
2840
- If you find the model, dataset, or training code useful, please cite our work
2841
-
2842
- ```bibtex
2843
- @misc{nussbaum2024nomic,
2844
- title={Nomic Embed: Training a Reproducible Long Context Text Embedder},
2845
- author={Zach Nussbaum and John X. Morris and Brandon Duderstadt and Andriy Mulyar},
2846
- year={2024},
2847
- eprint={2402.01613},
2848
- archivePrefix={arXiv},
2849
- primaryClass={cs.CL}
2850
- }
2851
- ```
 
1
  ---
 
 
2
  tags:
3
+ - setfit
4
+ - sentence-transformers
5
+ - text-classification
6
+ - generated_from_setfit_trainer
7
+ widget:
8
+ - text: Add a navigation menu text
9
+ - text: Change to square format
10
+ - text: Change the button text
11
+ - text: Make the object stand alone
12
+ - text: Mirror the logo vertically
13
+ metrics:
14
+ - accuracy
15
+ pipeline_tag: text-classification
16
+ library_name: setfit
17
+ inference: true
18
+ base_model: nomic-ai/nomic-embed-text-v1.5
19
  model-index:
20
+ - name: SetFit with nomic-ai/nomic-embed-text-v1.5
21
  results:
22
  - task:
23
+ type: text-classification
24
+ name: Text Classification
25
  dataset:
26
+ name: Unknown
27
+ type: unknown
 
28
  split: test
 
29
  metrics:
30
  - type: accuracy
31
+ value: 0.5353535353535354
32
+ name: Accuracy
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  ---
34
 
35
+ # SetFit with nomic-ai/nomic-embed-text-v1.5
36
+
37
+ This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [nomic-ai/nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
38
+
39
+ The model has been trained using an efficient few-shot learning technique that involves:
40
+
41
+ 1. Fine-tuning a [Sentence Transformer](https://www.sbert.net) with contrastive learning.
42
+ 2. Training a classification head with features from the fine-tuned Sentence Transformer.
43
+
44
+ ## Model Details
45
+
46
+ ### Model Description
47
+ - **Model Type:** SetFit
48
+ - **Sentence Transformer body:** [nomic-ai/nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5)
49
+ - **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
50
+ - **Maximum Sequence Length:** 8192 tokens
51
+ - **Number of Classes:** 63 classes
52
+ <!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
53
+ <!-- - **Language:** Unknown -->
54
+ <!-- - **License:** Unknown -->
55
+
56
+ ### Model Sources
57
+
58
+ - **Repository:** [SetFit on GitHub](https://github.com/huggingface/setfit)
59
+ - **Paper:** [Efficient Few-Shot Learning Without Prompts](https://arxiv.org/abs/2209.11055)
60
+ - **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
61
+
62
+ ### Model Labels
63
+ | Label | Examples |
64
+ |:------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
65
+ | 0 | <ul><li>'Add a corporate presentation background'</li><li>'Insert a modern icon set for the design'</li><li>'Add a mountain landscape background to the page'</li></ul> |
66
+ | 1 | <ul><li>'Find me some shape options for this design'</li><li>'I need some professional-looking assets'</li><li>'Can you recommend some images that would work well here?'</li></ul> |
67
+ | 2 | <ul><li>'Add a date and time for the event'</li><li>'Insert a disclaimer text'</li><li>'Add a navigation menu text'</li></ul> |
68
+ | 3 | <ul><li>'Distribute the icons evenly'</li><li>'Align all the text elements to the left'</li><li>'Align the footer elements'</li></ul> |
69
+ | 4 | <ul><li>'Make the button pulse'</li><li>'Add a flip animation'</li><li>'Make the text glow'</li></ul> |
70
+ | 5 | <ul><li>'Make everything fade in gradually'</li><li>'Make the page bounce in from the top'</li><li>'Add a spiral animation to the page'</li></ul> |
71
+ | 6 | <ul><li>'Change the building color'</li><li>'Remove the unwanted text overlay'</li><li>'Add a party hat to the dog'</li></ul> |
72
+ | 7 | <ul><li>'Remove the draft image'</li><li>'Delete the backup copy'</li><li>'Remove the unwanted image'</li></ul> |
73
+ | 8 | <ul><li>'What image editing tools do you have?'</li><li>'How do I create a template?'</li><li>'How can I align elements properly?'</li></ul> |
74
+ | 9 | <ul><li>'Distribute the buttons around the center image'</li><li>'Place the elements in a circular arrangement'</li><li>'Arrange the images in a circular layout'</li></ul> |
75
+ | 10 | <ul><li>'Duplicate the design'</li><li>'Make a second version'</li><li>'Create a new version'</li></ul> |
76
+ | 11 | <ul><li>'Duplicate the icon and move it'</li><li>'Duplicate the text and align it differently'</li><li>'Copy the image and apply a filter'</li></ul> |
77
+ | 12 | <ul><li>'Duplicate the logo to page 3'</li><li>'Copy the text to the last page'</li><li>'Copy the navigation to the next page'</li></ul> |
78
+ | 13 | <ul><li>'Fix the typographic errors'</li><li>'Improve the text flow'</li><li>'Fix the font consistency'</li></ul> |
79
+ | 14 | <ul><li>'Mirror the icon horizontally'</li><li>'Mirror the logo vertically'</li><li>'Flip the image horizontally'</li></ul> |
80
+ | 15 | <ul><li>'Create a photo of a futuristic city'</li><li>'Generate a picture of a tropical beach'</li><li>'Generate a picture of a cat playing with yarn'</li></ul> |
81
+ | 16 | <ul><li>'Create a card for a birthday party'</li><li>'Create a flyer for a happy birthday party'</li><li>'Generate an Instagram post for a birthday'</li></ul> |
82
+ | 17 | <ul><li>'Group the navigation elements'</li><li>'Combine the shape and text'</li><li>'Combine the image and overlay'</li></ul> |
83
+ | 18 | <ul><li>'Move the shape to the bottom'</li><li>'Position the icon at (0, 0)'</li><li>'Move the shape to coordinates (100, 100)'</li></ul> |
84
+ | 19 | <ul><li>'Add a sepia tone effect'</li><li>'Apply a modern filter'</li><li>'Make the image black and white'</li></ul> |
85
+ | 20 | <ul><li>'Suggest some shape designs'</li><li>'Show me pattern options'</li><li>'Find me border designs'</li></ul> |
86
+ | 21 | <ul><li>'Restore the previous opacity'</li><li>'Redo the text edit'</li><li>'Restore the previous color'</li></ul> |
87
+ | 22 | <ul><li>'Make the image have no background'</li><li>'Remove the background from the animal'</li><li>'Remove the background from the item'</li></ul> |
88
+ | 23 | <ul><li>'Delete the unwanted text'</li><li>'Remove the person from the background'</li><li>'Remove the graffiti'</li></ul> |
89
+ | 24 | <ul><li>'Replace the illustration'</li><li>'Change the background image'</li><li>'Change the product photo'</li></ul> |
90
+ | 25 | <ul><li>'Change the button text'</li><li>'Update the navigation text'</li><li>'Update the description'</li></ul> |
91
+ | 26 | <ul><li>'Remove all modifications'</li><li>'Restore the original colors'</li><li>'Remove all effects from the image'</li></ul> |
92
+ | 27 | <ul><li>'Scale the text up'</li><li>'Make the shape smaller'</li><li>'Reduce the shape size'</li></ul> |
93
+ | 28 | <ul><li>'Change to square format'</li><li>'Change to poster size'</li><li>'Make the page smaller'</li></ul> |
94
+ | 29 | <ul><li>'Rotate the text 45 degrees'</li><li>'Rotate the text 15 degrees'</li><li>'Turn the image counterclockwise'</li></ul> |
95
+ | 30 | <ul><li>'Distribute the shapes randomly'</li><li>'Distribute the leaves randomly on the page'</li><li>'Scatter the confetti around the design'</li></ul> |
96
+ | 31 | <ul><li>'Select the sidebar elements'</li><li>'Select the background image'</li><li>'Select the footer content'</li></ul> |
97
+ | 32 | <ul><li>'Change to a dark background'</li><li>'Make the background transparent'</li><li>'Change to a neutral background'</li></ul> |
98
+ | 33 | <ul><li>'Set the blend mode to soft light'</li><li>'Set the blend mode to darken'</li><li>'Set the blend mode to normal'</li></ul> |
99
+ | 34 | <ul><li>'Add a depth of field blur'</li><li>'Add a directional blur'</li><li>'Add a soft blur effect'</li></ul> |
100
+ | 35 | <ul><li>'Change the border style to solid'</li><li>'Change the border to dotted'</li><li>'Add a double border to the image'</li></ul> |
101
+ | 36 | <ul><li>'Brighten the shadows'</li><li>'Make the image brighter'</li><li>'Brighten the highlights'</li></ul> |
102
+ | 37 | <ul><li>'Bring the title to the front'</li><li>'Bring the shape to the front'</li><li>'Move the logo to the top layer'</li></ul> |
103
+ | 38 | <ul><li>'Make the image more intense'</li><li>'Enhance the contrast ratio'</li><li>'Increase the tonal range'</li></ul> |
104
+ | 39 | <ul><li>'Make the image round'</li><li>'Make the image rectangular'</li><li>'Crop to a heart shape'</li></ul> |
105
+ | 40 | <ul><li>'Add a sharp drop shadow'</li><li>'Add a gradient shadow'</li><li>'Add a hard shadow edge'</li></ul> |
106
+ | 41 | <ul><li>'Fill the shape with orange'</li><li>'Fill the element with brown'</li><li>'Change the shape color to blue'</li></ul> |
107
+ | 42 | <ul><li>'Make the text more readable'</li><li>'Increase the heading size'</li><li>'Make the description smaller'</li></ul> |
108
+ | 43 | <ul><li>'Make the text bold and italic'</li><li>'Add strikethrough to the text'</li><li>'Add bold to the title'</li></ul> |
109
+ | 44 | <ul><li>'Use a contemporary font'</li><li>'Use a professional font'</li><li>'Use a serif font for the heading'</li></ul> |
110
+ | 45 | <ul><li>'Make the bright areas brighter'</li><li>'Make the highlights more prominent'</li><li>'Enhance the bright spots'</li></ul> |
111
+ | 46 | <ul><li>'Set the image as background layer'</li><li>'Set the picture as background fill'</li><li>'Make the photo cover the background'</li></ul> |
112
+ | 47 | <ul><li>'Reduce the character spacing'</li><li>'Add letter spacing to the logo'</li><li>'Increase spacing between characters'</li></ul> |
113
+ | 48 | <ul><li>'Make the lines tighter'</li><li>'Spread out the text lines'</li><li>'Increase the paragraph spacing'</li></ul> |
114
+ | 49 | <ul><li>'Reduce the opacity of the overlay'</li><li>'Increase the transparency of the image'</li><li>'Make the shape more opaque'</li></ul> |
115
+ | 50 | <ul><li>'Make the paragraphs closer together'</li><li>'Increase the text block spacing'</li><li>'Reduce the paragraph spacing'</li></ul> |
116
+ | 51 | <ul><li>'Make the colors more intense'</li><li>'Increase the color depth'</li><li>'Make the image more colorful'</li></ul> |
117
+ | 52 | <ul><li>'Darken the shadows in the image'</li><li>'Increase the shadow intensity'</li><li>'Enhance the shadow depth'</li></ul> |
118
+ | 53 | <ul><li>'Enhance the image clarity'</li><li>'Increase the image sharpness'</li><li>'Sharpen the image details'</li></ul> |
119
+ | 54 | <ul><li>'Center the button text'</li><li>'Justify the paragraph text'</li><li>'Center the title text'</li></ul> |
120
+ | 55 | <ul><li>'Create a border around the text'</li><li>'Add a glow effect behind the text'</li><li>'Add a colored background to the text'</li></ul> |
121
+ | 56 | <ul><li>'Create text in a radial pattern'</li><li>'Create text that follows a circle'</li><li>'Create text that follows a shape'</li></ul> |
122
+ | 57 | <ul><li>'Convert to a bulleted list'</li><li>'Make the text into bullet points'</li><li>'Make the text into a list with bullets'</li></ul> |
123
+ | 58 | <ul><li>'Create a soft shadow behind the text'</li><li>'Add a dramatic shadow effect'</li><li>'Create a shadow for the text'</li></ul> |
124
+ | 59 | <ul><li>'Add warm undertones to the photo'</li><li>'Add warm color grading'</li><li>'Make the photo more golden hour'</li></ul> |
125
+ | 60 | <ul><li>'I need to add my own image'</li><li>'Open the image upload tool'</li><li>'Show me how to upload files'</li></ul> |
126
+ | 61 | <ul><li>'Revert the color change'</li><li>'Undo the last modification'</li><li>'Undo the text edit'</li></ul> |
127
+ | 62 | <ul><li>'Separate the grouped components'</li><li>'Ungroup the combined elements'</li><li>'Break up the grouped objects'</li></ul> |
128
+
129
+ ## Evaluation
130
+
131
+ ### Metrics
132
+ | Label | Accuracy |
133
+ |:--------|:---------|
134
+ | **all** | 0.5354 |
135
+
136
+ ## Uses
137
+
138
+ ### Direct Use for Inference
139
+
140
+ First install the SetFit library:
141
 
142
+ ```bash
143
+ pip install setfit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  ```
145
 
146
+ Then you can load this model and run inference.
 
 
 
 
 
147
 
148
  ```python
149
+ from setfit import SetFitModel
 
 
 
 
 
 
 
150
 
151
+ # Download from the 🤗 Hub
152
+ model = SetFitModel.from_pretrained("setfit_model_id")
153
+ # Run inference
154
+ preds = model("Change the button text")
155
  ```
156
 
157
+ <!--
158
+ ### Downstream Use
159
+
160
+ *List how someone could finetune this model on their own dataset.*
161
+ -->
162
+
163
+ <!--
164
+ ### Out-of-Scope Use
165
+
166
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
167
+ -->
168
+
169
+ <!--
170
+ ## Bias, Risks and Limitations
171
+
172
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
173
+ -->
174
+
175
+ <!--
176
+ ### Recommendations
177
+
178
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
179
+ -->
180
+
181
+ ## Training Details
182
+
183
+ ### Training Set Metrics
184
+ | Training set | Min | Median | Max |
185
+ |:-------------|:----|:-------|:----|
186
+ | Word count | 3 | 5.1778 | 10 |
187
+
188
+ | Label | Training Sample Count |
189
+ |:------|:----------------------|
190
+ | 0 | 5 |
191
+ | 1 | 5 |
192
+ | 2 | 5 |
193
+ | 3 | 5 |
194
+ | 4 | 5 |
195
+ | 5 | 5 |
196
+ | 6 | 5 |
197
+ | 7 | 5 |
198
+ | 8 | 5 |
199
+ | 9 | 5 |
200
+ | 10 | 5 |
201
+ | 11 | 5 |
202
+ | 12 | 5 |
203
+ | 13 | 5 |
204
+ | 14 | 5 |
205
+ | 15 | 5 |
206
+ | 16 | 5 |
207
+ | 17 | 5 |
208
+ | 18 | 5 |
209
+ | 19 | 5 |
210
+ | 20 | 5 |
211
+ | 21 | 5 |
212
+ | 22 | 5 |
213
+ | 23 | 5 |
214
+ | 24 | 5 |
215
+ | 25 | 5 |
216
+ | 26 | 5 |
217
+ | 27 | 5 |
218
+ | 28 | 5 |
219
+ | 29 | 5 |
220
+ | 30 | 5 |
221
+ | 31 | 5 |
222
+ | 32 | 5 |
223
+ | 33 | 5 |
224
+ | 34 | 5 |
225
+ | 35 | 5 |
226
+ | 36 | 5 |
227
+ | 37 | 5 |
228
+ | 38 | 5 |
229
+ | 39 | 5 |
230
+ | 40 | 5 |
231
+ | 41 | 5 |
232
+ | 42 | 5 |
233
+ | 43 | 5 |
234
+ | 44 | 5 |
235
+ | 45 | 5 |
236
+ | 46 | 5 |
237
+ | 47 | 5 |
238
+ | 48 | 5 |
239
+ | 49 | 5 |
240
+ | 50 | 5 |
241
+ | 51 | 5 |
242
+ | 52 | 5 |
243
+ | 53 | 5 |
244
+ | 54 | 5 |
245
+ | 55 | 5 |
246
+ | 56 | 5 |
247
+ | 57 | 5 |
248
+ | 58 | 5 |
249
+ | 59 | 5 |
250
+ | 60 | 5 |
251
+ | 61 | 5 |
252
+ | 62 | 5 |
253
+
254
+ ### Training Hyperparameters
255
+ - batch_size: (64, 64)
256
+ - num_epochs: (1, 1)
257
+ - max_steps: -1
258
+ - sampling_strategy: oversampling
259
+ - body_learning_rate: (2e-05, 1e-05)
260
+ - head_learning_rate: 0.01
261
+ - loss: CosineSimilarityLoss
262
+ - distance_metric: cosine_distance
263
+ - margin: 0.25
264
+ - end_to_end: False
265
+ - use_amp: False
266
+ - warmup_proportion: 0.1
267
+ - l2_weight: 0.01
268
+ - seed: 42
269
+ - eval_max_steps: -1
270
+ - load_best_model_at_end: False
271
+
272
+ ### Training Results
273
+ | Epoch | Step | Training Loss | Validation Loss |
274
+ |:------:|:----:|:-------------:|:---------------:|
275
+ | 0.0007 | 1 | 0.1545 | - |
276
+ | 0.0328 | 50 | 0.1187 | - |
277
+ | 0.0655 | 100 | 0.0521 | - |
278
+ | 0.0983 | 150 | 0.0208 | - |
279
+ | 0.1311 | 200 | 0.0123 | - |
280
+ | 0.1638 | 250 | 0.0096 | - |
281
+ | 0.1966 | 300 | 0.0056 | - |
282
+ | 0.2294 | 350 | 0.0036 | - |
283
+ | 0.2621 | 400 | 0.0027 | - |
284
+ | 0.2949 | 450 | 0.0017 | - |
285
+ | 0.3277 | 500 | 0.0007 | - |
286
+ | 0.3604 | 550 | 0.0009 | - |
287
+ | 0.3932 | 600 | 0.0009 | - |
288
+ | 0.4260 | 650 | 0.0003 | - |
289
+ | 0.4587 | 700 | 0.0003 | - |
290
+ | 0.4915 | 750 | 0.0004 | - |
291
+ | 0.5242 | 800 | 0.0004 | - |
292
+ | 0.5570 | 850 | 0.0002 | - |
293
+ | 0.5898 | 900 | 0.0001 | - |
294
+ | 0.6225 | 950 | 0.0001 | - |
295
+ | 0.6553 | 1000 | 0.0001 | - |
296
+ | 0.6881 | 1050 | 0.0001 | - |
297
+ | 0.7208 | 1100 | 0.0001 | - |
298
+ | 0.7536 | 1150 | 0.0001 | - |
299
+ | 0.7864 | 1200 | 0.0001 | - |
300
+ | 0.8191 | 1250 | 0.0001 | - |
301
+ | 0.8519 | 1300 | 0.0001 | - |
302
+ | 0.8847 | 1350 | 0.0001 | - |
303
+ | 0.9174 | 1400 | 0.0001 | - |
304
+ | 0.9502 | 1450 | 0.0001 | - |
305
+ | 0.9830 | 1500 | 0.0001 | - |
306
+
307
+ ### Framework Versions
308
+ - Python: 3.12.11
309
+ - SetFit: 1.1.3
310
+ - Sentence Transformers: 5.1.0
311
+ - Transformers: 4.54.1
312
+ - PyTorch: 2.7.1
313
+ - Datasets: 4.0.0
314
+ - Tokenizers: 0.21.4
315
+
316
+ ## Citation
317
+
318
+ ### BibTeX
319
+ ```bibtex
320
+ @article{https://doi.org/10.48550/arxiv.2209.11055,
321
+ doi = {10.48550/ARXIV.2209.11055},
322
+ url = {https://arxiv.org/abs/2209.11055},
323
+ author = {Tunstall, Lewis and Reimers, Nils and Jo, Unso Eun Seo and Bates, Luke and Korat, Daniel and Wasserblat, Moshe and Pereg, Oren},
324
+ keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
325
+ title = {Efficient Few-Shot Learning Without Prompts},
326
+ publisher = {arXiv},
327
+ year = {2022},
328
+ copyright = {Creative Commons Attribution 4.0 International}
329
+ }
330
  ```
331
 
332
+ <!--
333
+ ## Glossary
 
 
 
 
 
 
 
 
 
 
 
 
334
 
335
+ *Clearly define terms in order to be accessible across audiences.*
336
+ -->
337
 
338
+ <!--
339
+ ## Model Card Authors
340
 
341
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
342
+ -->
343
 
344
+ <!--
345
+ ## Model Card Contact
346
 
347
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
348
+ -->
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config_setfit.json CHANGED
@@ -1,68 +1,68 @@
1
  {
2
  "normalize_embeddings": false,
3
  "labels": [
4
- "addAsset",
5
- "addAssetsUIAction",
6
- "addText",
7
- "align",
8
- "applyAnimationObjectPreset",
9
- "applyPageLevelAnimation",
10
- "changeContentInImage",
11
- "delete",
12
- "displayHelpQA",
13
- "distributeAroundObject",
14
- "duplicatePage",
15
- "duplicateSceneNode",
16
- "duplicateSceneNodeToPage",
17
- "fixTypography",
18
- "flip",
19
- "generateImageFromText",
20
- "generateTemplates",
21
- "group",
22
- "moveRelativeTo",
23
- "previewFilterEffects",
24
- "proposeAssets",
25
- "redo",
26
- "removeBackground",
27
- "removeContentFromImage",
28
- "replaceAsset",
29
- "replaceText",
30
- "resetImageSettings",
31
- "resize",
32
- "resizePage",
33
- "rotate",
34
- "scatter",
35
- "select",
36
- "setBackgroundColor",
37
- "setBlendMode",
38
- "setBlur",
39
- "setBorderStyle",
40
- "setBrightness",
41
- "setChildIndex",
42
- "setContrast",
43
- "setCropShape",
44
- "setDropShadow",
45
- "setFillColor",
46
- "setFontSize",
47
- "setFontStyle",
48
- "setFontType",
49
- "setHighlight",
50
- "setImageAsBackground",
51
- "setLetterSpacing",
52
- "setLineSpacing",
53
- "setOpacity",
54
- "setParagraphSpacing",
55
- "setSaturation",
56
- "setShadow",
57
- "setSharpness",
58
- "setTextAlignment",
59
- "setTextDecorationEffect",
60
- "setTextLayout",
61
- "setTextListStyle",
62
- "setTextShadow",
63
- "setWarmth",
64
- "showUploadUI",
65
- "undo",
66
- "ungroup"
67
  ]
68
  }
 
1
  {
2
  "normalize_embeddings": false,
3
  "labels": [
4
+ 0,
5
+ 1,
6
+ 2,
7
+ 3,
8
+ 4,
9
+ 5,
10
+ 6,
11
+ 7,
12
+ 8,
13
+ 9,
14
+ 10,
15
+ 11,
16
+ 12,
17
+ 13,
18
+ 14,
19
+ 15,
20
+ 16,
21
+ 17,
22
+ 18,
23
+ 19,
24
+ 20,
25
+ 21,
26
+ 22,
27
+ 23,
28
+ 24,
29
+ 25,
30
+ 26,
31
+ 27,
32
+ 28,
33
+ 29,
34
+ 30,
35
+ 31,
36
+ 32,
37
+ 33,
38
+ 34,
39
+ 35,
40
+ 36,
41
+ 37,
42
+ 38,
43
+ 39,
44
+ 40,
45
+ 41,
46
+ 42,
47
+ 43,
48
+ 44,
49
+ 45,
50
+ 46,
51
+ 47,
52
+ 48,
53
+ 49,
54
+ 50,
55
+ 51,
56
+ 52,
57
+ 53,
58
+ 54,
59
+ 55,
60
+ 56,
61
+ 57,
62
+ 58,
63
+ 59,
64
+ 60,
65
+ 61,
66
+ 62
67
  ]
68
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa9aadd2159de2c42651146a5986764684ab69bce27981e785e6526cc9029108
3
  size 546938168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fffecbd38859dda02c39c881c1dd347654613c58bb22dcd705706dd3c9247709
3
  size 546938168
model_head.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c2fcf5c85e55b03ddb9686e88e2cab24fd04e6aec17e2fa0af1333caca47794
3
  size 388927
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:963cbad21c41b69e67104f59ae67989240513569fb9d1218df64a659c819609a
3
  size 388927