metadata
model-index:
- name: karsar/paraphrase-multilingual-MiniLM-L12-hu_v1
results:
- dataset:
config: hun_Latn-hun_Latn
name: MTEB BelebeleRetrieval (hun_Latn-hun_Latn)
revision: 75b399394a9803252cfec289d103de462763db7c
split: test
type: facebook/belebele
metrics:
- type: main_score
value: 77.865
- type: map_at_1
value: 67.333
- type: map_at_10
value: 74.404
- type: map_at_100
value: 74.802
- type: map_at_1000
value: 74.809
- type: map_at_20
value: 74.63
- type: map_at_3
value: 72.796
- type: map_at_5
value: 73.67399999999999
- type: mrr_at_1
value: 67.33333333333333
- type: mrr_at_10
value: 74.40396825396829
- type: mrr_at_100
value: 74.80177264047548
- type: mrr_at_1000
value: 74.80937346439818
- type: mrr_at_20
value: 74.62979204843244
- type: mrr_at_3
value: 72.7962962962963
- type: mrr_at_5
value: 73.6740740740741
- type: nauc_map_at_1000_diff1
value: 76.08133094195743
- type: nauc_map_at_1000_max
value: 61.727834175182736
- type: nauc_map_at_1000_std
value: -2.3231732437794568
- type: nauc_map_at_100_diff1
value: 76.07916259051902
- type: nauc_map_at_100_max
value: 61.72703450852774
- type: nauc_map_at_100_std
value: -2.3175338063349575
- type: nauc_map_at_10_diff1
value: 75.97996147738112
- type: nauc_map_at_10_max
value: 61.860784493617224
- type: nauc_map_at_10_std
value: -2.4887315051072356
- type: nauc_map_at_1_diff1
value: 78.13561632940586
- type: nauc_map_at_1_max
value: 59.243520843511746
- type: nauc_map_at_1_std
value: -2.6689239089679515
- type: nauc_map_at_20_diff1
value: 76.06883452011327
- type: nauc_map_at_20_max
value: 61.775589074510826
- type: nauc_map_at_20_std
value: -2.3905575770447585
- type: nauc_map_at_3_diff1
value: 75.85937006372846
- type: nauc_map_at_3_max
value: 61.819093557650895
- type: nauc_map_at_3_std
value: -2.5207238945764647
- type: nauc_map_at_5_diff1
value: 76.06929563357589
- type: nauc_map_at_5_max
value: 61.93563829360039
- type: nauc_map_at_5_std
value: -1.9424637593671918
- type: nauc_mrr_at_1000_diff1
value: 76.08133094195743
- type: nauc_mrr_at_1000_max
value: 61.727834175182736
- type: nauc_mrr_at_1000_std
value: -2.3231732437794568
- type: nauc_mrr_at_100_diff1
value: 76.07916259051902
- type: nauc_mrr_at_100_max
value: 61.72703450852774
- type: nauc_mrr_at_100_std
value: -2.3175338063349575
- type: nauc_mrr_at_10_diff1
value: 75.97996147738112
- type: nauc_mrr_at_10_max
value: 61.860784493617224
- type: nauc_mrr_at_10_std
value: -2.4887315051072356
- type: nauc_mrr_at_1_diff1
value: 78.13561632940586
- type: nauc_mrr_at_1_max
value: 59.243520843511746
- type: nauc_mrr_at_1_std
value: -2.6689239089679515
- type: nauc_mrr_at_20_diff1
value: 76.06883452011327
- type: nauc_mrr_at_20_max
value: 61.775589074510826
- type: nauc_mrr_at_20_std
value: -2.3905575770447585
- type: nauc_mrr_at_3_diff1
value: 75.85937006372846
- type: nauc_mrr_at_3_max
value: 61.819093557650895
- type: nauc_mrr_at_3_std
value: -2.5207238945764647
- type: nauc_mrr_at_5_diff1
value: 76.06929563357589
- type: nauc_mrr_at_5_max
value: 61.93563829360039
- type: nauc_mrr_at_5_std
value: -1.9424637593671918
- type: nauc_ndcg_at_1000_diff1
value: 75.7057240434196
- type: nauc_ndcg_at_1000_max
value: 62.021717989510385
- type: nauc_ndcg_at_1000_std
value: -2.2522490330905103
- type: nauc_ndcg_at_100_diff1
value: 75.62156032414751
- type: nauc_ndcg_at_100_max
value: 61.97932968109654
- type: nauc_ndcg_at_100_std
value: -2.0118635701265375
- type: nauc_ndcg_at_10_diff1
value: 75.09836101324169
- type: nauc_ndcg_at_10_max
value: 62.703427209156736
- type: nauc_ndcg_at_10_std
value: -2.9287738405282395
- type: nauc_ndcg_at_1_diff1
value: 78.13561632940586
- type: nauc_ndcg_at_1_max
value: 59.243520843511746
- type: nauc_ndcg_at_1_std
value: -2.6689239089679515
- type: nauc_ndcg_at_20_diff1
value: 75.46348763248093
- type: nauc_ndcg_at_20_max
value: 62.35498579351012
- type: nauc_ndcg_at_20_std
value: -2.577338920595739
- type: nauc_ndcg_at_3_diff1
value: 74.92773626606146
- type: nauc_ndcg_at_3_max
value: 62.55812080913172
- type: nauc_ndcg_at_3_std
value: -2.5630879822636476
- type: nauc_ndcg_at_5_diff1
value: 75.3100398038724
- type: nauc_ndcg_at_5_max
value: 62.81733471459409
- type: nauc_ndcg_at_5_std
value: -1.501748019065971
- type: nauc_precision_at_1000_diff1
value: .nan
- type: nauc_precision_at_1000_max
value: .nan
- type: nauc_precision_at_1000_std
value: .nan
- type: nauc_precision_at_100_diff1
value: 66.63165266106552
- type: nauc_precision_at_100_max
value: 57.60582010582053
- type: nauc_precision_at_100_std
value: 23.844537815126937
- type: nauc_precision_at_10_diff1
value: 70.08984254109942
- type: nauc_precision_at_10_max
value: 67.45880653843606
- type: nauc_precision_at_10_std
value: -6.3555626412584
- type: nauc_precision_at_1_diff1
value: 78.13561632940586
- type: nauc_precision_at_1_max
value: 59.243520843511746
- type: nauc_precision_at_1_std
value: -2.6689239089679515
- type: nauc_precision_at_20_diff1
value: 71.63306637208878
- type: nauc_precision_at_20_max
value: 65.99137307505141
- type: nauc_precision_at_20_std
value: -4.675767020423249
- type: nauc_precision_at_3_diff1
value: 71.57608769475272
- type: nauc_precision_at_3_max
value: 65.10683383365713
- type: nauc_precision_at_3_std
value: -2.7514636167292985
- type: nauc_precision_at_5_diff1
value: 72.21412151067312
- type: nauc_precision_at_5_max
value: 66.43448275862069
- type: nauc_precision_at_5_std
value: 0.4555008210180189
- type: nauc_recall_at_1000_diff1
value: .nan
- type: nauc_recall_at_1000_max
value: .nan
- type: nauc_recall_at_1000_std
value: .nan
- type: nauc_recall_at_100_diff1
value: 66.63165266106327
- type: nauc_recall_at_100_max
value: 57.60582010581922
- type: nauc_recall_at_100_std
value: 23.844537815125907
- type: nauc_recall_at_10_diff1
value: 70.08984254109967
- type: nauc_recall_at_10_max
value: 67.45880653843632
- type: nauc_recall_at_10_std
value: -6.355562641258283
- type: nauc_recall_at_1_diff1
value: 78.13561632940586
- type: nauc_recall_at_1_max
value: 59.243520843511746
- type: nauc_recall_at_1_std
value: -2.6689239089679515
- type: nauc_recall_at_20_diff1
value: 71.6330663720887
- type: nauc_recall_at_20_max
value: 65.9913730750516
- type: nauc_recall_at_20_std
value: -4.675767020422999
- type: nauc_recall_at_3_diff1
value: 71.57608769475274
- type: nauc_recall_at_3_max
value: 65.106833833657
- type: nauc_recall_at_3_std
value: -2.7514636167294
- type: nauc_recall_at_5_diff1
value: 72.21412151067315
- type: nauc_recall_at_5_max
value: 66.43448275862077
- type: nauc_recall_at_5_std
value: 0.4555008210180812
- type: ndcg_at_1
value: 67.333
- type: ndcg_at_10
value: 77.865
- type: ndcg_at_100
value: 79.927
- type: ndcg_at_1000
value: 80.104
- type: ndcg_at_20
value: 78.701
- type: ndcg_at_3
value: 74.509
- type: ndcg_at_5
value: 76.101
- type: precision_at_1
value: 67.333
- type: precision_at_10
value: 8.878
- type: precision_at_100
value: 0.987
- type: precision_at_1000
value: 0.1
- type: precision_at_20
value: 4.606
- type: precision_at_3
value: 26.480999999999998
- type: precision_at_5
value: 16.667
- type: recall_at_1
value: 67.333
- type: recall_at_10
value: 88.778
- type: recall_at_100
value: 98.667
- type: recall_at_1000
value: 100
- type: recall_at_20
value: 92.111
- type: recall_at_3
value: 79.444
- type: recall_at_5
value: 83.333
task:
type: Retrieval
- dataset:
config: hun_Latn-eng_Latn
name: MTEB BelebeleRetrieval (hun_Latn-eng_Latn)
revision: 75b399394a9803252cfec289d103de462763db7c
split: test
type: facebook/belebele
metrics:
- type: main_score
value: 71.307
- type: map_at_1
value: 57.778
- type: map_at_10
value: 66.843
- type: map_at_100
value: 67.368
- type: map_at_1000
value: 67.38300000000001
- type: map_at_20
value: 67.162
- type: map_at_3
value: 64.704
- type: map_at_5
value: 65.97
- type: mrr_at_1
value: 57.77777777777777
- type: mrr_at_10
value: 66.8428130511464
- type: mrr_at_100
value: 67.36803803097415
- type: mrr_at_1000
value: 67.38317813286176
- type: mrr_at_20
value: 67.16164827986293
- type: mrr_at_3
value: 64.7037037037037
- type: mrr_at_5
value: 65.97037037037038
- type: nauc_map_at_1000_diff1
value: 69.02219987684592
- type: nauc_map_at_1000_max
value: 60.114123597785785
- type: nauc_map_at_1000_std
value: 4.880216382742553
- type: nauc_map_at_100_diff1
value: 69.01116363727591
- type: nauc_map_at_100_max
value: 60.11716622079215
- type: nauc_map_at_100_std
value: 4.890393343425179
- type: nauc_map_at_10_diff1
value: 68.95240309900163
- type: nauc_map_at_10_max
value: 60.124170478386105
- type: nauc_map_at_10_std
value: 4.819161459028938
- type: nauc_map_at_1_diff1
value: 72.45335820895522
- type: nauc_map_at_1_max
value: 59.127316006176
- type: nauc_map_at_1_std
value: 6.580191713844538
- type: nauc_map_at_20_diff1
value: 68.87249492072671
- type: nauc_map_at_20_max
value: 60.04834608184139
- type: nauc_map_at_20_std
value: 4.807958211395879
- type: nauc_map_at_3_diff1
value: 69.38092756897547
- type: nauc_map_at_3_max
value: 60.30271451423346
- type: nauc_map_at_3_std
value: 3.9374045068220322
- type: nauc_map_at_5_diff1
value: 69.10875854889262
- type: nauc_map_at_5_max
value: 60.24557626138646
- type: nauc_map_at_5_std
value: 4.271289591515184
- type: nauc_mrr_at_1000_diff1
value: 69.02219987684592
- type: nauc_mrr_at_1000_max
value: 60.114123597785785
- type: nauc_mrr_at_1000_std
value: 4.880216382742553
- type: nauc_mrr_at_100_diff1
value: 69.01116363727591
- type: nauc_mrr_at_100_max
value: 60.11716622079215
- type: nauc_mrr_at_100_std
value: 4.890393343425179
- type: nauc_mrr_at_10_diff1
value: 68.95240309900163
- type: nauc_mrr_at_10_max
value: 60.124170478386105
- type: nauc_mrr_at_10_std
value: 4.819161459028938
- type: nauc_mrr_at_1_diff1
value: 72.45335820895522
- type: nauc_mrr_at_1_max
value: 59.127316006176
- type: nauc_mrr_at_1_std
value: 6.580191713844538
- type: nauc_mrr_at_20_diff1
value: 68.87249492072671
- type: nauc_mrr_at_20_max
value: 60.04834608184139
- type: nauc_mrr_at_20_std
value: 4.807958211395879
- type: nauc_mrr_at_3_diff1
value: 69.38092756897547
- type: nauc_mrr_at_3_max
value: 60.30271451423346
- type: nauc_mrr_at_3_std
value: 3.9374045068220322
- type: nauc_mrr_at_5_diff1
value: 69.10875854889262
- type: nauc_mrr_at_5_max
value: 60.24557626138646
- type: nauc_mrr_at_5_std
value: 4.271289591515184
- type: nauc_ndcg_at_1000_diff1
value: 68.36151731152576
- type: nauc_ndcg_at_1000_max
value: 60.21499073164881
- type: nauc_ndcg_at_1000_std
value: 5.019374170320369
- type: nauc_ndcg_at_100_diff1
value: 68.12777182930174
- type: nauc_ndcg_at_100_max
value: 60.293069076013296
- type: nauc_ndcg_at_100_std
value: 5.375522795479381
- type: nauc_ndcg_at_10_diff1
value: 67.46914440211127
- type: nauc_ndcg_at_10_max
value: 60.210209508170976
- type: nauc_ndcg_at_10_std
value: 4.921793458534013
- type: nauc_ndcg_at_1_diff1
value: 72.45335820895522
- type: nauc_ndcg_at_1_max
value: 59.127316006176
- type: nauc_ndcg_at_1_std
value: 6.580191713844538
- type: nauc_ndcg_at_20_diff1
value: 67.09692054164125
- type: nauc_ndcg_at_20_max
value: 59.89689460185056
- type: nauc_ndcg_at_20_std
value: 4.977631579372532
- type: nauc_ndcg_at_3_diff1
value: 68.54468748113734
- type: nauc_ndcg_at_3_max
value: 60.66886257099051
- type: nauc_ndcg_at_3_std
value: 3.073807310026356
- type: nauc_ndcg_at_5_diff1
value: 67.94441056262235
- type: nauc_ndcg_at_5_max
value: 60.47774252804478
- type: nauc_ndcg_at_5_std
value: 3.572034464519458
- type: nauc_precision_at_1000_diff1
value: .nan
- type: nauc_precision_at_1000_max
value: .nan
- type: nauc_precision_at_1000_std
value: .nan
- type: nauc_precision_at_100_diff1
value: 52.808123249299676
- type: nauc_precision_at_100_max
value: 65.81699346405254
- type: nauc_precision_at_100_std
value: 31.809056956116383
- type: nauc_precision_at_10_diff1
value: 59.02820830750145
- type: nauc_precision_at_10_max
value: 60.33787972721626
- type: nauc_precision_at_10_std
value: 6.405175213296739
- type: nauc_precision_at_1_diff1
value: 72.45335820895522
- type: nauc_precision_at_1_max
value: 59.127316006176
- type: nauc_precision_at_1_std
value: 6.580191713844538
- type: nauc_precision_at_20_diff1
value: 52.242994576107485
- type: nauc_precision_at_20_max
value: 57.56617253643015
- type: nauc_precision_at_20_std
value: 7.9884388212213455
- type: nauc_precision_at_3_diff1
value: 65.73191064426206
- type: nauc_precision_at_3_max
value: 61.92373010829596
- type: nauc_precision_at_3_std
value: 0.096317142458587
- type: nauc_precision_at_5_diff1
value: 63.20464039592358
- type: nauc_precision_at_5_max
value: 61.25721735891223
- type: nauc_precision_at_5_std
value: 0.7937099220392029
- type: nauc_recall_at_1000_diff1
value: .nan
- type: nauc_recall_at_1000_max
value: .nan
- type: nauc_recall_at_1000_std
value: .nan
- type: nauc_recall_at_100_diff1
value: 52.80812324929921
- type: nauc_recall_at_100_max
value: 65.81699346405242
- type: nauc_recall_at_100_std
value: 31.809056956115235
- type: nauc_recall_at_10_diff1
value: 59.02820830750159
- type: nauc_recall_at_10_max
value: 60.337879727216446
- type: nauc_recall_at_10_std
value: 6.405175213296646
- type: nauc_recall_at_1_diff1
value: 72.45335820895522
- type: nauc_recall_at_1_max
value: 59.127316006176
- type: nauc_recall_at_1_std
value: 6.580191713844538
- type: nauc_recall_at_20_diff1
value: 52.242994576107534
- type: nauc_recall_at_20_max
value: 57.56617253643034
- type: nauc_recall_at_20_std
value: 7.988438821221468
- type: nauc_recall_at_3_diff1
value: 65.73191064426209
- type: nauc_recall_at_3_max
value: 61.923730108295906
- type: nauc_recall_at_3_std
value: 0.09631714245861488
- type: nauc_recall_at_5_diff1
value: 63.204640395923626
- type: nauc_recall_at_5_max
value: 61.25721735891235
- type: nauc_recall_at_5_std
value: 0.7937099220392697
- type: ndcg_at_1
value: 57.778
- type: ndcg_at_10
value: 71.307
- type: ndcg_at_100
value: 73.942
- type: ndcg_at_1000
value: 74.248
- type: ndcg_at_20
value: 72.499
- type: ndcg_at_3
value: 66.95
- type: ndcg_at_5
value: 69.21199999999999
- type: precision_at_1
value: 57.778
- type: precision_at_10
value: 8.533
- type: precision_at_100
value: 0.9780000000000001
- type: precision_at_1000
value: 0.1
- type: precision_at_20
value: 4.506
- type: precision_at_3
value: 24.481
- type: precision_at_5
value: 15.778
- type: recall_at_1
value: 57.778
- type: recall_at_10
value: 85.333
- type: recall_at_100
value: 97.77799999999999
- type: recall_at_1000
value: 100
- type: recall_at_20
value: 90.11099999999999
- type: recall_at_3
value: 73.444
- type: recall_at_5
value: 78.889
task:
type: Retrieval
- dataset:
config: eng_Latn-hun_Latn
name: MTEB BelebeleRetrieval (eng_Latn-hun_Latn)
revision: 75b399394a9803252cfec289d103de462763db7c
split: test
type: facebook/belebele
metrics:
- type: main_score
value: 73.668
- type: map_at_1
value: 60.778
- type: map_at_10
value: 69.571
- type: map_at_100
value: 70.114
- type: map_at_1000
value: 70.124
- type: map_at_20
value: 69.93700000000001
- type: map_at_3
value: 67.778
- type: map_at_5
value: 68.872
- type: mrr_at_1
value: 60.77777777777777
- type: mrr_at_10
value: 69.57142857142857
- type: mrr_at_100
value: 70.1136336675579
- type: mrr_at_1000
value: 70.12432347462514
- type: mrr_at_20
value: 69.93690215204663
- type: mrr_at_3
value: 67.77777777777779
- type: mrr_at_5
value: 68.87222222222223
- type: nauc_map_at_1000_diff1
value: 70.84789011327231
- type: nauc_map_at_1000_max
value: 60.852088181225824
- type: nauc_map_at_1000_std
value: 6.549993568212846
- type: nauc_map_at_100_diff1
value: 70.84603146007751
- type: nauc_map_at_100_max
value: 60.859417397516125
- type: nauc_map_at_100_std
value: 6.577244018939677
- type: nauc_map_at_10_diff1
value: 70.71490936568583
- type: nauc_map_at_10_max
value: 60.94472236517367
- type: nauc_map_at_10_std
value: 6.53657697773106
- type: nauc_map_at_1_diff1
value: 74.59301032751448
- type: nauc_map_at_1_max
value: 59.251209223705935
- type: nauc_map_at_1_std
value: 6.536579330592454
- type: nauc_map_at_20_diff1
value: 70.69902333418673
- type: nauc_map_at_20_max
value: 60.84819592450007
- type: nauc_map_at_20_std
value: 6.487171209675751
- type: nauc_map_at_3_diff1
value: 70.94073456299253
- type: nauc_map_at_3_max
value: 61.117845574972286
- type: nauc_map_at_3_std
value: 5.824524654602759
- type: nauc_map_at_5_diff1
value: 70.64337838638826
- type: nauc_map_at_5_max
value: 60.69375707294804
- type: nauc_map_at_5_std
value: 6.1403804587682025
- type: nauc_mrr_at_1000_diff1
value: 70.84789011327231
- type: nauc_mrr_at_1000_max
value: 60.852088181225824
- type: nauc_mrr_at_1000_std
value: 6.549993568212846
- type: nauc_mrr_at_100_diff1
value: 70.84603146007751
- type: nauc_mrr_at_100_max
value: 60.859417397516125
- type: nauc_mrr_at_100_std
value: 6.577244018939677
- type: nauc_mrr_at_10_diff1
value: 70.71490936568583
- type: nauc_mrr_at_10_max
value: 60.94472236517367
- type: nauc_mrr_at_10_std
value: 6.53657697773106
- type: nauc_mrr_at_1_diff1
value: 74.59301032751448
- type: nauc_mrr_at_1_max
value: 59.251209223705935
- type: nauc_mrr_at_1_std
value: 6.536579330592454
- type: nauc_mrr_at_20_diff1
value: 70.69902333418673
- type: nauc_mrr_at_20_max
value: 60.84819592450007
- type: nauc_mrr_at_20_std
value: 6.487171209675751
- type: nauc_mrr_at_3_diff1
value: 70.94073456299253
- type: nauc_mrr_at_3_max
value: 61.117845574972286
- type: nauc_mrr_at_3_std
value: 5.824524654602759
- type: nauc_mrr_at_5_diff1
value: 70.64337838638826
- type: nauc_mrr_at_5_max
value: 60.69375707294804
- type: nauc_mrr_at_5_std
value: 6.1403804587682025
- type: nauc_ndcg_at_1000_diff1
value: 70.2568421673153
- type: nauc_ndcg_at_1000_max
value: 61.154155762479746
- type: nauc_ndcg_at_1000_std
value: 6.987492117976732
- type: nauc_ndcg_at_100_diff1
value: 70.23106290886678
- type: nauc_ndcg_at_100_max
value: 61.387176821366296
- type: nauc_ndcg_at_100_std
value: 7.782749694416603
- type: nauc_ndcg_at_10_diff1
value: 69.26227190907855
- type: nauc_ndcg_at_10_max
value: 61.634434826859874
- type: nauc_ndcg_at_10_std
value: 7.185316156791736
- type: nauc_ndcg_at_1_diff1
value: 74.59301032751448
- type: nauc_ndcg_at_1_max
value: 59.251209223705935
- type: nauc_ndcg_at_1_std
value: 6.536579330592454
- type: nauc_ndcg_at_20_diff1
value: 69.1954116973286
- type: nauc_ndcg_at_20_max
value: 61.38887961478062
- type: nauc_ndcg_at_20_std
value: 7.1318041010309585
- type: nauc_ndcg_at_3_diff1
value: 69.75775816678905
- type: nauc_ndcg_at_3_max
value: 61.67436817540673
- type: nauc_ndcg_at_3_std
value: 5.650531149732009
- type: nauc_ndcg_at_5_diff1
value: 69.1651947412561
- type: nauc_ndcg_at_5_max
value: 60.97882565960433
- type: nauc_ndcg_at_5_std
value: 6.203128058155249
- type: nauc_precision_at_1000_diff1
value: .nan
- type: nauc_precision_at_1000_max
value: .nan
- type: nauc_precision_at_1000_std
value: .nan
- type: nauc_precision_at_100_diff1
value: 68.65491294557121
- type: nauc_precision_at_100_max
value: 80.36744109408565
- type: nauc_precision_at_100_std
value: 70.92327126929257
- type: nauc_precision_at_10_diff1
value: 61.29162638094176
- type: nauc_precision_at_10_max
value: 65.7264903076506
- type: nauc_precision_at_10_std
value: 11.47548778748128
- type: nauc_precision_at_1_diff1
value: 74.59301032751448
- type: nauc_precision_at_1_max
value: 59.251209223705935
- type: nauc_precision_at_1_std
value: 6.536579330592454
- type: nauc_precision_at_20_diff1
value: 56.51478369125409
- type: nauc_precision_at_20_max
value: 66.28882664176771
- type: nauc_precision_at_20_std
value: 14.05415499533146
- type: nauc_precision_at_3_diff1
value: 65.55150000975934
- type: nauc_precision_at_3_max
value: 63.631594870493636
- type: nauc_precision_at_3_std
value: 5.057287295297996
- type: nauc_precision_at_5_diff1
value: 62.93787770906014
- type: nauc_precision_at_5_max
value: 62.06285784899278
- type: nauc_precision_at_5_std
value: 6.577948558011871
- type: nauc_recall_at_1000_diff1
value: .nan
- type: nauc_recall_at_1000_max
value: .nan
- type: nauc_recall_at_1000_std
value: .nan
- type: nauc_recall_at_100_diff1
value: 68.6549129455701
- type: nauc_recall_at_100_max
value: 80.36744109408454
- type: nauc_recall_at_100_std
value: 70.92327126929207
- type: nauc_recall_at_10_diff1
value: 61.29162638094184
- type: nauc_recall_at_10_max
value: 65.72649030765079
- type: nauc_recall_at_10_std
value: 11.475487787481537
- type: nauc_recall_at_1_diff1
value: 74.59301032751448
- type: nauc_recall_at_1_max
value: 59.251209223705935
- type: nauc_recall_at_1_std
value: 6.536579330592454
- type: nauc_recall_at_20_diff1
value: 56.514783691254266
- type: nauc_recall_at_20_max
value: 66.28882664176774
- type: nauc_recall_at_20_std
value: 14.054154995331741
- type: nauc_recall_at_3_diff1
value: 65.55150000975928
- type: nauc_recall_at_3_max
value: 63.63159487049364
- type: nauc_recall_at_3_std
value: 5.05728729529798
- type: nauc_recall_at_5_diff1
value: 62.937877709060295
- type: nauc_recall_at_5_max
value: 62.06285784899285
- type: nauc_recall_at_5_std
value: 6.577948558011953
- type: ndcg_at_1
value: 60.778
- type: ndcg_at_10
value: 73.668
- type: ndcg_at_100
value: 76.21
- type: ndcg_at_1000
value: 76.459
- type: ndcg_at_20
value: 74.993
- type: ndcg_at_3
value: 70.00800000000001
- type: ndcg_at_5
value: 71.978
- type: precision_at_1
value: 60.778
- type: precision_at_10
value: 8.644
- type: precision_at_100
value: 0.9809999999999999
- type: precision_at_1000
value: 0.1
- type: precision_at_20
value: 4.583
- type: precision_at_3
value: 25.480999999999998
- type: precision_at_5
value: 16.244
- type: recall_at_1
value: 60.778
- type: recall_at_10
value: 86.444
- type: recall_at_100
value: 98.111
- type: recall_at_1000
value: 100
- type: recall_at_20
value: 91.667
- type: recall_at_3
value: 76.444
- type: recall_at_5
value: 81.22200000000001
task:
type: Retrieval
- dataset:
config: eng_Latn-hun_Latn
name: MTEB BibleNLPBitextMining (eng_Latn-hun_Latn)
revision: 264a18480c529d9e922483839b4b9758e690b762
split: train
type: davidstap/biblenlp-corpus-mmteb
metrics:
- type: accuracy
value: 88.671875
- type: f1
value: 85.859375
- type: main_score
value: 85.859375
- type: precision
value: 84.71354166666667
- type: recall
value: 88.671875
task:
type: BitextMining
- dataset:
config: hun_Latn-eng_Latn
name: MTEB BibleNLPBitextMining (hun_Latn-eng_Latn)
revision: 264a18480c529d9e922483839b4b9758e690b762
split: train
type: davidstap/biblenlp-corpus-mmteb
metrics:
- type: accuracy
value: 91.796875
- type: f1
value: 89.41406249999999
- type: main_score
value: 89.41406249999999
- type: precision
value: 88.31380208333334
- type: recall
value: 91.796875
task:
type: BitextMining
- dataset:
config: default
name: MTEB HunSum2AbstractiveRetrieval (default)
revision: 24e1445c8180d937f0a16f8ae8a62e77cc952e56
split: test
type: SZTAKI-HLT/HunSum-2-abstractive
metrics:
- type: main_score
value: 63.263000000000005
- type: map_at_1
value: 63.263000000000005
- type: map_at_10
value: 69.717
- type: map_at_100
value: 70.19999999999999
- type: map_at_1000
value: 70.223
- type: map_at_20
value: 69.987
- type: map_at_3
value: 68.126
- type: map_at_5
value: 69.11500000000001
- type: mrr_at_1
value: 63.263263263263255
- type: mrr_at_10
value: 69.71656179989505
- type: mrr_at_100
value: 70.20005091433352
- type: mrr_at_1000
value: 70.22300238535382
- type: mrr_at_20
value: 69.98650484718584
- type: mrr_at_3
value: 68.12645979312641
- type: mrr_at_5
value: 69.11494828161491
- type: nauc_map_at_1000_diff1
value: 78.57062147162597
- type: nauc_map_at_1000_max
value: 67.50701502337495
- type: nauc_map_at_1000_std
value: -0.5617129044803558
- type: nauc_map_at_100_diff1
value: 78.55994402867587
- type: nauc_map_at_100_max
value: 67.50751346612932
- type: nauc_map_at_100_std
value: -0.5527533150571393
- type: nauc_map_at_10_diff1
value: 78.40366721771652
- type: nauc_map_at_10_max
value: 67.49241622659412
- type: nauc_map_at_10_std
value: -0.48552097268197614
- type: nauc_map_at_1_diff1
value: 82.01486923813978
- type: nauc_map_at_1_max
value: 65.96265600324601
- type: nauc_map_at_1_std
value: -3.3920974069100702
- type: nauc_map_at_20_diff1
value: 78.47160921094391
- type: nauc_map_at_20_max
value: 67.53010937556571
- type: nauc_map_at_20_std
value: -0.5304810036230149
- type: nauc_map_at_3_diff1
value: 78.82728109994231
- type: nauc_map_at_3_max
value: 67.67886259360823
- type: nauc_map_at_3_std
value: -0.8390404611287001
- type: nauc_map_at_5_diff1
value: 78.64851152021848
- type: nauc_map_at_5_max
value: 67.56443643847581
- type: nauc_map_at_5_std
value: -0.5438994708241538
- type: nauc_mrr_at_1000_diff1
value: 78.57062147162597
- type: nauc_mrr_at_1000_max
value: 67.50701502337495
- type: nauc_mrr_at_1000_std
value: -0.5617129044803558
- type: nauc_mrr_at_100_diff1
value: 78.55994402867587
- type: nauc_mrr_at_100_max
value: 67.50751346612932
- type: nauc_mrr_at_100_std
value: -0.5527533150571393
- type: nauc_mrr_at_10_diff1
value: 78.40366721771652
- type: nauc_mrr_at_10_max
value: 67.49241622659412
- type: nauc_mrr_at_10_std
value: -0.48552097268197614
- type: nauc_mrr_at_1_diff1
value: 82.01486923813978
- type: nauc_mrr_at_1_max
value: 65.96265600324601
- type: nauc_mrr_at_1_std
value: -3.3920974069100702
- type: nauc_mrr_at_20_diff1
value: 78.47160921094391
- type: nauc_mrr_at_20_max
value: 67.53010937556571
- type: nauc_mrr_at_20_std
value: -0.5304810036230149
- type: nauc_mrr_at_3_diff1
value: 78.82728109994231
- type: nauc_mrr_at_3_max
value: 67.67886259360823
- type: nauc_mrr_at_3_std
value: -0.8390404611287001
- type: nauc_mrr_at_5_diff1
value: 78.64851152021848
- type: nauc_mrr_at_5_max
value: 67.56443643847581
- type: nauc_mrr_at_5_std
value: -0.5438994708241538
- type: nauc_ndcg_at_1000_diff1
value: 77.85313935589254
- type: nauc_ndcg_at_1000_max
value: 67.79745016701565
- type: nauc_ndcg_at_1000_std
value: 0.3743893992928968
- type: nauc_ndcg_at_100_diff1
value: 77.54895730138853
- type: nauc_ndcg_at_100_max
value: 67.90017248869928
- type: nauc_ndcg_at_100_std
value: 0.859162358234398
- type: nauc_ndcg_at_10_diff1
value: 76.71113405671676
- type: nauc_ndcg_at_10_max
value: 67.96034182778398
- type: nauc_ndcg_at_10_std
value: 1.1822837192182254
- type: nauc_ndcg_at_1_diff1
value: 82.01486923813978
- type: nauc_ndcg_at_1_max
value: 65.96265600324601
- type: nauc_ndcg_at_1_std
value: -3.3920974069100702
- type: nauc_ndcg_at_20_diff1
value: 76.93959621702203
- type: nauc_ndcg_at_20_max
value: 68.11195662698223
- type: nauc_ndcg_at_20_std
value: 1.04309687394849
- type: nauc_ndcg_at_3_diff1
value: 77.79565059957739
- type: nauc_ndcg_at_3_max
value: 68.28729385816999
- type: nauc_ndcg_at_3_std
value: 0.2325515867720005
- type: nauc_ndcg_at_5_diff1
value: 77.37740780039985
- type: nauc_ndcg_at_5_max
value: 68.0591693716456
- type: nauc_ndcg_at_5_std
value: 0.8419316054801026
- type: nauc_precision_at_1000_diff1
value: 70.06119288295852
- type: nauc_precision_at_1000_max
value: 56.300969751588504
- type: nauc_precision_at_1000_std
value: 42.8131104675957
- type: nauc_precision_at_100_diff1
value: 67.53252742986358
- type: nauc_precision_at_100_max
value: 71.63984328411749
- type: nauc_precision_at_100_std
value: 20.467710864542678
- type: nauc_precision_at_10_diff1
value: 68.62375685620702
- type: nauc_precision_at_10_max
value: 70.02532507228068
- type: nauc_precision_at_10_std
value: 9.35439782317633
- type: nauc_precision_at_1_diff1
value: 82.01486923813978
- type: nauc_precision_at_1_max
value: 65.96265600324601
- type: nauc_precision_at_1_std
value: -3.3920974069100702
- type: nauc_precision_at_20_diff1
value: 67.96187481073133
- type: nauc_precision_at_20_max
value: 71.59854027319963
- type: nauc_precision_at_20_std
value: 10.641909874113086
- type: nauc_precision_at_3_diff1
value: 74.38802810704372
- type: nauc_precision_at_3_max
value: 70.31804260818862
- type: nauc_precision_at_3_std
value: 3.8694413447531946
- type: nauc_precision_at_5_diff1
value: 72.53680275396366
- type: nauc_precision_at_5_max
value: 69.84127154759457
- type: nauc_precision_at_5_std
value: 6.232801743816592
- type: nauc_recall_at_1000_diff1
value: 70.06119288296337
- type: nauc_recall_at_1000_max
value: 56.30096975158339
- type: nauc_recall_at_1000_std
value: 42.81311046760523
- type: nauc_recall_at_100_diff1
value: 67.53252742986345
- type: nauc_recall_at_100_max
value: 71.63984328411706
- type: nauc_recall_at_100_std
value: 20.46771086454334
- type: nauc_recall_at_10_diff1
value: 68.62375685620707
- type: nauc_recall_at_10_max
value: 70.02532507228068
- type: nauc_recall_at_10_std
value: 9.354397823176459
- type: nauc_recall_at_1_diff1
value: 82.01486923813978
- type: nauc_recall_at_1_max
value: 65.96265600324601
- type: nauc_recall_at_1_std
value: -3.3920974069100702
- type: nauc_recall_at_20_diff1
value: 67.96187481073152
- type: nauc_recall_at_20_max
value: 71.59854027319979
- type: nauc_recall_at_20_std
value: 10.641909874113258
- type: nauc_recall_at_3_diff1
value: 74.3880281070437
- type: nauc_recall_at_3_max
value: 70.31804260818865
- type: nauc_recall_at_3_std
value: 3.8694413447530995
- type: nauc_recall_at_5_diff1
value: 72.53680275396374
- type: nauc_recall_at_5_max
value: 69.84127154759464
- type: nauc_recall_at_5_std
value: 6.232801743816686
- type: ndcg_at_1
value: 63.263000000000005
- type: ndcg_at_10
value: 72.89099999999999
- type: ndcg_at_100
value: 75.421
- type: ndcg_at_1000
value: 76.027
- type: ndcg_at_20
value: 73.919
- type: ndcg_at_3
value: 69.646
- type: ndcg_at_5
value: 71.434
- type: precision_at_1
value: 63.263000000000005
- type: precision_at_10
value: 8.288
- type: precision_at_100
value: 0.95
- type: precision_at_1000
value: 0.1
- type: precision_at_20
value: 4.352
- type: precision_at_3
value: 24.675
- type: precision_at_5
value: 15.676000000000002
- type: recall_at_1
value: 63.263000000000005
- type: recall_at_10
value: 82.883
- type: recall_at_100
value: 95.045
- type: recall_at_1000
value: 99.8
- type: recall_at_20
value: 87.03699999999999
- type: recall_at_3
value: 74.024
- type: recall_at_5
value: 78.378
task:
type: Retrieval
- dataset:
config: hu
name: MTEB MassiveIntentClassification (hu)
revision: 4672e20407010da34463acc759c162ca9734bca6
split: test
type: mteb/amazon_massive_intent
metrics:
- type: accuracy
value: 60.08406186953599
- type: f1
value: 56.958742875652455
- type: f1_weighted
value: 60.57068245324919
- type: main_score
value: 60.08406186953599
task:
type: Classification
- dataset:
config: hu
name: MTEB MassiveIntentClassification (hu)
revision: 4672e20407010da34463acc759c162ca9734bca6
split: validation
type: mteb/amazon_massive_intent
metrics:
- type: accuracy
value: 60.201672405312344
- type: f1
value: 57.03816512332761
- type: f1_weighted
value: 60.53109947438201
- type: main_score
value: 60.201672405312344
task:
type: Classification
- dataset:
config: hu
name: MTEB MassiveScenarioClassification (hu)
revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8
split: test
type: mteb/amazon_massive_scenario
metrics:
- type: accuracy
value: 66.61398789509079
- type: f1
value: 65.88647044935249
- type: f1_weighted
value: 66.80145146976484
- type: main_score
value: 66.61398789509079
task:
type: Classification
- dataset:
config: hu
name: MTEB MassiveScenarioClassification (hu)
revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8
split: validation
type: mteb/amazon_massive_scenario
metrics:
- type: accuracy
value: 66.11411706837187
- type: f1
value: 65.76717397996951
- type: f1_weighted
value: 66.29902597756885
- type: main_score
value: 66.11411706837187
task:
type: Classification
- dataset:
config: hu
name: MTEB MultiEURLEXMultilabelClassification (hu)
revision: 2aea5a6dc8fdcfeca41d0fb963c0a338930bde5c
split: test
type: mteb/eurlex-multilingual
metrics:
- type: accuracy
value: 3.0839999999999996
- type: f1
value: 27.860225486785566
- type: lrap
value: 43.02579150793552
- type: main_score
value: 3.0839999999999996
task:
type: MultilabelClassification
- dataset:
config: arb_Arab-hun_Latn
name: MTEB NTREXBitextMining (arb_Arab-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 85.678517776665
- type: f1
value: 81.92049979731502
- type: main_score
value: 81.92049979731502
- type: precision
value: 80.21115005842097
- type: recall
value: 85.678517776665
task:
type: BitextMining
- dataset:
config: ben_Beng-hun_Latn
name: MTEB NTREXBitextMining (ben_Beng-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 44.566850275413124
- type: f1
value: 39.07033025889276
- type: main_score
value: 39.07033025889276
- type: precision
value: 37.07348327291399
- type: recall
value: 44.566850275413124
task:
type: BitextMining
- dataset:
config: deu_Latn-hun_Latn
name: MTEB NTREXBitextMining (deu_Latn-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 93.44016024036054
- type: f1
value: 91.61909530963112
- type: main_score
value: 91.61909530963112
- type: precision
value: 90.75279586045735
- type: recall
value: 93.44016024036054
task:
type: BitextMining
- dataset:
config: ell_Grek-hun_Latn
name: MTEB NTREXBitextMining (ell_Grek-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 91.4371557336004
- type: f1
value: 89.0261582850466
- type: main_score
value: 89.0261582850466
- type: precision
value: 87.9043565348022
- type: recall
value: 91.4371557336004
task:
type: BitextMining
- dataset:
config: eng_Latn-hun_Latn
name: MTEB NTREXBitextMining (eng_Latn-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 94.44166249374061
- type: f1
value: 92.8092138207311
- type: main_score
value: 92.8092138207311
- type: precision
value: 92.0422300116842
- type: recall
value: 94.44166249374061
task:
type: BitextMining
- dataset:
config: fas_Arab-hun_Latn
name: MTEB NTREXBitextMining (fas_Arab-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 89.53430145217827
- type: f1
value: 86.72270310227245
- type: main_score
value: 86.72270310227245
- type: precision
value: 85.42814221331997
- type: recall
value: 89.53430145217827
task:
type: BitextMining
- dataset:
config: fin_Latn-hun_Latn
name: MTEB NTREXBitextMining (fin_Latn-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 90.98647971957938
- type: f1
value: 88.44600233683859
- type: main_score
value: 88.44600233683859
- type: precision
value: 87.2575529961609
- type: recall
value: 90.98647971957938
task:
type: BitextMining
- dataset:
config: fra_Latn-hun_Latn
name: MTEB NTREXBitextMining (fra_Latn-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 92.28843264897347
- type: f1
value: 90.12518778167251
- type: main_score
value: 90.12518778167251
- type: precision
value: 89.12535469871473
- type: recall
value: 92.28843264897347
task:
type: BitextMining
- dataset:
config: heb_Hebr-hun_Latn
name: MTEB NTREXBitextMining (heb_Hebr-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 87.33099649474211
- type: f1
value: 83.88582874311467
- type: main_score
value: 83.88582874311467
- type: precision
value: 82.31263562009681
- type: recall
value: 87.33099649474211
task:
type: BitextMining
- dataset:
config: hin_Deva-hun_Latn
name: MTEB NTREXBitextMining (hin_Deva-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 86.52979469203805
- type: f1
value: 83.08240137984755
- type: main_score
value: 83.08240137984755
- type: precision
value: 81.51352028042064
- type: recall
value: 86.52979469203805
task:
type: BitextMining
- dataset:
config: hun_Latn-arb_Arab
name: MTEB NTREXBitextMining (hun_Latn-arb_Arab)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 86.73009514271406
- type: f1
value: 83.12397167179341
- type: main_score
value: 83.12397167179341
- type: precision
value: 81.47805040894676
- type: recall
value: 86.73009514271406
task:
type: BitextMining
- dataset:
config: hun_Latn-ben_Beng
name: MTEB NTREXBitextMining (hun_Latn-ben_Beng)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 41.16174261392088
- type: f1
value: 32.73025519520262
- type: main_score
value: 32.73025519520262
- type: precision
value: 29.859172986363774
- type: recall
value: 41.16174261392088
task:
type: BitextMining
- dataset:
config: hun_Latn-deu_Latn
name: MTEB NTREXBitextMining (hun_Latn-deu_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 93.39008512769153
- type: f1
value: 91.5456518110499
- type: main_score
value: 91.5456518110499
- type: precision
value: 90.66099148723085
- type: recall
value: 93.39008512769153
task:
type: BitextMining
- dataset:
config: hun_Latn-ell_Grek
name: MTEB NTREXBitextMining (hun_Latn-ell_Grek)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 92.03805708562844
- type: f1
value: 89.81305291270239
- type: main_score
value: 89.81305291270239
- type: precision
value: 88.78317476214322
- type: recall
value: 92.03805708562844
task:
type: BitextMining
- dataset:
config: hun_Latn-eng_Latn
name: MTEB NTREXBitextMining (hun_Latn-eng_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 94.74211316975463
- type: f1
value: 93.23985978968453
- type: main_score
value: 93.23985978968453
- type: precision
value: 92.51377065598398
- type: recall
value: 94.74211316975463
task:
type: BitextMining
- dataset:
config: hun_Latn-fas_Arab
name: MTEB NTREXBitextMining (hun_Latn-fas_Arab)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 88.5327991987982
- type: f1
value: 85.49240527457853
- type: main_score
value: 85.49240527457853
- type: precision
value: 84.10413238905979
- type: recall
value: 88.5327991987982
task:
type: BitextMining
- dataset:
config: hun_Latn-fin_Latn
name: MTEB NTREXBitextMining (hun_Latn-fin_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 90.23535302954431
- type: f1
value: 87.53296611584042
- type: main_score
value: 87.53296611584042
- type: precision
value: 86.26690035052579
- type: recall
value: 90.23535302954431
task:
type: BitextMining
- dataset:
config: hun_Latn-fra_Latn
name: MTEB NTREXBitextMining (hun_Latn-fra_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 92.63895843765648
- type: f1
value: 90.47070605908863
- type: main_score
value: 90.47070605908863
- type: precision
value: 89.42163244867301
- type: recall
value: 92.63895843765648
task:
type: BitextMining
- dataset:
config: hun_Latn-heb_Hebr
name: MTEB NTREXBitextMining (hun_Latn-heb_Hebr)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 86.62994491737606
- type: f1
value: 83.19388173168845
- type: main_score
value: 83.19388173168845
- type: precision
value: 81.65832081455517
- type: recall
value: 86.62994491737606
task:
type: BitextMining
- dataset:
config: hun_Latn-hin_Deva
name: MTEB NTREXBitextMining (hun_Latn-hin_Deva)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 83.97596394591888
- type: f1
value: 79.85502062617736
- type: main_score
value: 79.85502062617736
- type: precision
value: 78.01758192844824
- type: recall
value: 83.97596394591888
task:
type: BitextMining
- dataset:
config: hun_Latn-ind_Latn
name: MTEB NTREXBitextMining (hun_Latn-ind_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 92.68903355032549
- type: f1
value: 90.64596895343014
- type: main_score
value: 90.64596895343014
- type: precision
value: 89.68869971624103
- type: recall
value: 92.68903355032549
task:
type: BitextMining
- dataset:
config: hun_Latn-jpn_Jpan
name: MTEB NTREXBitextMining (hun_Latn-jpn_Jpan)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 85.778668002003
- type: f1
value: 82.19829744616925
- type: main_score
value: 82.19829744616925
- type: precision
value: 80.62426973794025
- type: recall
value: 85.778668002003
task:
type: BitextMining
- dataset:
config: hun_Latn-kor_Hang
name: MTEB NTREXBitextMining (hun_Latn-kor_Hang)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 84.17626439659489
- type: f1
value: 80.26746468909714
- type: main_score
value: 80.26746468909714
- type: precision
value: 78.5646097351155
- type: recall
value: 84.17626439659489
task:
type: BitextMining
- dataset:
config: hun_Latn-lav_Latn
name: MTEB NTREXBitextMining (hun_Latn-lav_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 90.1352028042063
- type: f1
value: 87.30262059756302
- type: main_score
value: 87.30262059756302
- type: precision
value: 85.98731430479052
- type: recall
value: 90.1352028042063
task:
type: BitextMining
- dataset:
config: hun_Latn-lit_Latn
name: MTEB NTREXBitextMining (hun_Latn-lit_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 89.58437656484726
- type: f1
value: 86.8252378567852
- type: main_score
value: 86.8252378567852
- type: precision
value: 85.54581872809214
- type: recall
value: 89.58437656484726
task:
type: BitextMining
- dataset:
config: hun_Latn-nld_Latn
name: MTEB NTREXBitextMining (hun_Latn-nld_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 93.03955933900852
- type: f1
value: 91.03989317309296
- type: main_score
value: 91.03989317309296
- type: precision
value: 90.08930061759305
- type: recall
value: 93.03955933900852
task:
type: BitextMining
- dataset:
config: hun_Latn-pol_Latn
name: MTEB NTREXBitextMining (hun_Latn-pol_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 91.58738107160741
- type: f1
value: 89.28225671841095
- type: main_score
value: 89.28225671841095
- type: precision
value: 88.18227341011517
- type: recall
value: 91.58738107160741
task:
type: BitextMining
- dataset:
config: hun_Latn-por_Latn
name: MTEB NTREXBitextMining (hun_Latn-por_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 93.59038557836755
- type: f1
value: 91.71256885327992
- type: main_score
value: 91.71256885327992
- type: precision
value: 90.80287097312635
- type: recall
value: 93.59038557836755
task:
type: BitextMining
- dataset:
config: hun_Latn-rus_Cyrl
name: MTEB NTREXBitextMining (hun_Latn-rus_Cyrl)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 91.3370055082624
- type: f1
value: 88.88916708395926
- type: main_score
value: 88.88916708395926
- type: precision
value: 87.75961561389704
- type: recall
value: 91.3370055082624
task:
type: BitextMining
- dataset:
config: hun_Latn-spa_Latn
name: MTEB NTREXBitextMining (hun_Latn-spa_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 93.69053580370556
- type: f1
value: 91.94959105324652
- type: main_score
value: 91.94959105324652
- type: precision
value: 91.12418627941913
- type: recall
value: 93.69053580370556
task:
type: BitextMining
- dataset:
config: hun_Latn-swa_Latn
name: MTEB NTREXBitextMining (hun_Latn-swa_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 35.803705558337505
- type: f1
value: 27.79832969518814
- type: main_score
value: 27.79832969518814
- type: precision
value: 25.370895920971037
- type: recall
value: 35.803705558337505
task:
type: BitextMining
- dataset:
config: hun_Latn-swe_Latn
name: MTEB NTREXBitextMining (hun_Latn-swe_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 93.59038557836755
- type: f1
value: 91.66249374061091
- type: main_score
value: 91.66249374061091
- type: precision
value: 90.74445000834585
- type: recall
value: 93.59038557836755
task:
type: BitextMining
- dataset:
config: hun_Latn-tam_Taml
name: MTEB NTREXBitextMining (hun_Latn-tam_Taml)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 27.391086629944915
- type: f1
value: 19.094552675413095
- type: main_score
value: 19.094552675413095
- type: precision
value: 16.88288208814635
- type: recall
value: 27.391086629944915
task:
type: BitextMining
- dataset:
config: hun_Latn-tur_Latn
name: MTEB NTREXBitextMining (hun_Latn-tur_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 91.48723084626941
- type: f1
value: 89.11700884660323
- type: main_score
value: 89.11700884660323
- type: precision
value: 87.99031881155067
- type: recall
value: 91.48723084626941
task:
type: BitextMining
- dataset:
config: hun_Latn-vie_Latn
name: MTEB NTREXBitextMining (hun_Latn-vie_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 91.13670505758637
- type: f1
value: 88.6696711734268
- type: main_score
value: 88.6696711734268
- type: precision
value: 87.49374061091638
- type: recall
value: 91.13670505758637
task:
type: BitextMining
- dataset:
config: hun_Latn-zho_Hant
name: MTEB NTREXBitextMining (hun_Latn-zho_Hant)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 89.33400100150224
- type: f1
value: 86.55745523046474
- type: main_score
value: 86.55745523046474
- type: precision
value: 85.29794692038057
- type: recall
value: 89.33400100150224
task:
type: BitextMining
- dataset:
config: hun_Latn-zul_Latn
name: MTEB NTREXBitextMining (hun_Latn-zul_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 16.675012518778168
- type: f1
value: 11.21636405139599
- type: main_score
value: 11.21636405139599
- type: precision
value: 9.903070059112947
- type: recall
value: 16.675012518778168
task:
type: BitextMining
- dataset:
config: ind_Latn-hun_Latn
name: MTEB NTREXBitextMining (ind_Latn-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 92.93940911367051
- type: f1
value: 90.96478050408946
- type: main_score
value: 90.96478050408946
- type: precision
value: 90.03922550492406
- type: recall
value: 92.93940911367051
task:
type: BitextMining
- dataset:
config: jpn_Jpan-hun_Latn
name: MTEB NTREXBitextMining (jpn_Jpan-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 88.28242363545317
- type: f1
value: 85.11433817392756
- type: main_score
value: 85.11433817392756
- type: precision
value: 83.67551326990485
- type: recall
value: 88.28242363545317
task:
type: BitextMining
- dataset:
config: kor_Hang-hun_Latn
name: MTEB NTREXBitextMining (kor_Hang-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 85.778668002003
- type: f1
value: 81.83608746453012
- type: main_score
value: 81.83608746453012
- type: precision
value: 80.0233683859122
- type: recall
value: 85.778668002003
task:
type: BitextMining
- dataset:
config: lav_Latn-hun_Latn
name: MTEB NTREXBitextMining (lav_Latn-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 91.73760640961443
- type: f1
value: 89.42914371557336
- type: main_score
value: 89.42914371557336
- type: precision
value: 88.32832582206642
- type: recall
value: 91.73760640961443
task:
type: BitextMining
- dataset:
config: lit_Latn-hun_Latn
name: MTEB NTREXBitextMining (lit_Latn-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 91.78768152228342
- type: f1
value: 89.50926389584376
- type: main_score
value: 89.50926389584376
- type: precision
value: 88.39926556501419
- type: recall
value: 91.78768152228342
task:
type: BitextMining
- dataset:
config: nld_Latn-hun_Latn
name: MTEB NTREXBitextMining (nld_Latn-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 93.49023535302955
- type: f1
value: 91.6190953096311
- type: main_score
value: 91.6190953096311
- type: precision
value: 90.72775830412286
- type: recall
value: 93.49023535302955
task:
type: BitextMining
- dataset:
config: pol_Latn-hun_Latn
name: MTEB NTREXBitextMining (pol_Latn-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 91.28693039559339
- type: f1
value: 88.99515940577533
- type: main_score
value: 88.99515940577533
- type: precision
value: 87.9293940911367
- type: recall
value: 91.28693039559339
task:
type: BitextMining
- dataset:
config: por_Latn-hun_Latn
name: MTEB NTREXBitextMining (por_Latn-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 93.03955933900852
- type: f1
value: 91.08496077449509
- type: main_score
value: 91.08496077449509
- type: precision
value: 90.17860123518612
- type: recall
value: 93.03955933900852
task:
type: BitextMining
- dataset:
config: rus_Cyrl-hun_Latn
name: MTEB NTREXBitextMining (rus_Cyrl-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 90.98647971957938
- type: f1
value: 88.43932565514937
- type: main_score
value: 88.43932565514937
- type: precision
value: 87.2475379736271
- type: recall
value: 90.98647971957938
task:
type: BitextMining
- dataset:
config: spa_Latn-hun_Latn
name: MTEB NTREXBitextMining (spa_Latn-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 93.23985978968453
- type: f1
value: 91.3386746786847
- type: main_score
value: 91.3386746786847
- type: precision
value: 90.43148055416457
- type: recall
value: 93.23985978968453
task:
type: BitextMining
- dataset:
config: swa_Latn-hun_Latn
name: MTEB NTREXBitextMining (swa_Latn-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 35.95393089634452
- type: f1
value: 30.612257939034187
- type: main_score
value: 30.612257939034187
- type: precision
value: 28.995078568906944
- type: recall
value: 35.95393089634452
task:
type: BitextMining
- dataset:
config: swe_Latn-hun_Latn
name: MTEB NTREXBitextMining (swe_Latn-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 93.64046069103655
- type: f1
value: 91.86613253213153
- type: main_score
value: 91.86613253213153
- type: precision
value: 91.04072775830413
- type: recall
value: 93.64046069103655
task:
type: BitextMining
- dataset:
config: tam_Taml-hun_Latn
name: MTEB NTREXBitextMining (tam_Taml-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 29.04356534802203
- type: f1
value: 25.164093122029808
- type: main_score
value: 25.164093122029808
- type: precision
value: 23.849573878565543
- type: recall
value: 29.04356534802203
task:
type: BitextMining
- dataset:
config: tur_Latn-hun_Latn
name: MTEB NTREXBitextMining (tur_Latn-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 90.83625438157236
- type: f1
value: 88.36087464530128
- type: main_score
value: 88.36087464530128
- type: precision
value: 87.19829744616925
- type: recall
value: 90.83625438157236
task:
type: BitextMining
- dataset:
config: vie_Latn-hun_Latn
name: MTEB NTREXBitextMining (vie_Latn-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 90.68602904356536
- type: f1
value: 88.10882991153397
- type: main_score
value: 88.10882991153397
- type: precision
value: 86.90118511099983
- type: recall
value: 90.68602904356536
task:
type: BitextMining
- dataset:
config: zho_Hant-hun_Latn
name: MTEB NTREXBitextMining (zho_Hant-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 90.1352028042063
- type: f1
value: 87.46035720247039
- type: main_score
value: 87.46035720247039
- type: precision
value: 86.19810668383528
- type: recall
value: 90.1352028042063
task:
type: BitextMining
- dataset:
config: zul_Latn-hun_Latn
name: MTEB NTREXBitextMining (zul_Latn-hun_Latn)
revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33
split: test
type: mteb/NTREX
metrics:
- type: accuracy
value: 17.1256885327992
- type: f1
value: 13.692538409811572
- type: main_score
value: 13.692538409811572
- type: precision
value: 12.811084017018844
- type: recall
value: 17.1256885327992
task:
type: BitextMining
- dataset:
config: rom-hun
name: MTEB RomaTalesBitextMining (rom-hun)
revision: f4394dbca6845743cd33eba77431767b232ef489
split: test
type: kardosdrur/roma-tales
metrics:
- type: accuracy
value: 6.046511627906977
- type: f1
value: 2.950830564784053
- type: main_score
value: 2.950830564784053
- type: precision
value: 2.295127353266888
- type: recall
value: 6.046511627906977
task:
type: BitextMining
- dataset:
config: hun_Latn
name: MTEB SIB200Classification (hun_Latn)
revision: a74d7350ea12af010cfb1c21e34f1f81fd2e615b
split: test
type: mteb/sib200
metrics:
- type: accuracy
value: 72.74509803921569
- type: f1
value: 71.6748881571977
- type: f1_weighted
value: 72.7699432186266
- type: main_score
value: 72.74509803921569
task:
type: Classification
- dataset:
config: hun_Latn
name: MTEB SIB200Classification (hun_Latn)
revision: a74d7350ea12af010cfb1c21e34f1f81fd2e615b
split: train
type: mteb/sib200
metrics:
- type: accuracy
value: 71.92582025677605
- type: f1
value: 70.9175403606058
- type: f1_weighted
value: 71.9988920000764
- type: main_score
value: 71.92582025677605
task:
type: Classification
- dataset:
config: hun_Latn
name: MTEB SIB200Classification (hun_Latn)
revision: a74d7350ea12af010cfb1c21e34f1f81fd2e615b
split: validation
type: mteb/sib200
metrics:
- type: accuracy
value: 66.76767676767676
- type: f1
value: 66.07599012119566
- type: f1_weighted
value: 67.15823510190054
- type: main_score
value: 66.76767676767676
task:
type: Classification
- dataset:
config: hun_Latn
name: MTEB SIB200ClusteringS2S (hun_Latn)
revision: a74d7350ea12af010cfb1c21e34f1f81fd2e615b
split: test
type: mteb/sib200
metrics:
- type: main_score
value: 39.24288169703154
- type: v_measure
value: 39.24288169703154
- type: v_measure_std
value: 2.214708184335194
task:
type: Clustering
- dataset:
config: hun-eng
name: MTEB Tatoeba (hun-eng)
revision: 69e8f12da6e31d59addadda9a9c8a2e601a0e282
split: test
type: mteb/tatoeba-bitext-mining
metrics:
- type: accuracy
value: 91
- type: f1
value: 88.47999999999999
- type: main_score
value: 88.47999999999999
- type: precision
value: 87.3
- type: recall
value: 91
task:
type: BitextMining
tags:
- mteb
base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 language:
- hu library_name: sentence-transformers license: apache-2.0 metrics:
- cosine_accuracy
- dot_accuracy
- manhattan_accuracy
- euclidean_accuracy
- max_accuracy pipeline_tag: sentence-similarity tags:
- sentence-transformers
- sentence-similarity
- feature-extraction
- generated_from_trainer
- dataset_size:857856
- loss:MultipleNegativesRankingLoss widget:
- source_sentence: Emberek várnak a lámpánál kerékpárral.
sentences:
- Az emberek piros lámpánál haladnak.
- Az emberek a kerékpárjukon vannak.
- Egy fekete kutya úszik a vízben egy teniszlabdával a szájában
- source_sentence: A kutya a vízben van.
sentences:
- Két férfi takarítja a havat a tetőről, az egyik egy emelőben ül, a másik pedig a tetőn.
- A macska a vízben van, és dühös.
- Egy kutya van a vízben, a szájában egy faág.
- source_sentence: A nő feketét visel.
sentences:
- Egy barna kutya fröcsköl, ahogy úszik a vízben.
- Egy tetoválással rendelkező nő, aki fekete tank tetején néz a földre.
- 'Egy kékbe öltözött nő intenzív arckifejezéssel üti a teniszlabdát. A képen:'
- source_sentence: Az emberek alszanak.
sentences:
- Három ember beszélget egy városi utcán.
- A nő fehéret visel.
- Egy apa és a fia ölelgeti alvás közben.
- source_sentence: Az emberek alszanak.
sentences:
- Egy feketébe öltözött nő cigarettát és bevásárlótáskát tart a kezében, miközben egy idősebb nő átmegy az utcán.
- Egy csoport ember ül egy nyitott, térszerű területen, mögötte nagy bokrok és egy sor viktoriánus stílusú épület, melyek közül sokat a kép jobb oldalán lévő erős elmosódás tesz kivehetetlenné.
- Egy apa és a fia ölelgeti alvás közben. model-index:
- name: paraphrase-multilingual-MiniLM-L12-hu-v1
results:
- task:
type: triplet
name: Triplet
dataset:
name: all nli dev
type: all-nli-dev
metrics:
- type: cosine_accuracy value: 0.992 name: Cosine Accuracy
- type: dot_accuracy value: 0.0108 name: Dot Accuracy
- type: manhattan_accuracy value: 0.9908 name: Manhattan Accuracy
- type: euclidean_accuracy value: 0.9908 name: Euclidean Accuracy
- type: max_accuracy value: 0.992 name: Max Accuracy
- task:
type: triplet
name: Triplet
dataset:
name: all nli test
type: all-nli-test
metrics:
- type: cosine_accuracy value: 0.9913636363636363 name: Cosine Accuracy
- type: dot_accuracy value: 0.013939393939393939 name: Dot Accuracy
- type: manhattan_accuracy value: 0.990909090909091 name: Manhattan Accuracy
- type: euclidean_accuracy value: 0.9910606060606061 name: Euclidean Accuracy
- type: max_accuracy value: 0.9913636363636363 name: Max Accuracy
- task:
type: triplet
name: Triplet
dataset:
name: all nli dev
type: all-nli-dev
metrics:
paraphrase-multilingual-MiniLM-L12-v2
This is a sentence-transformers model finetuned from sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 on the train dataset. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
Model Details
Model Description
- Model Type: Sentence Transformer
- Base model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
- Maximum Sequence Length: 128 tokens
- Output Dimensionality: 384 tokens
- Similarity Function: Cosine Similarity
- Training Dataset:
- train
- Language: hu
- License: apache-2.0
Model Sources
- Documentation: Sentence Transformers Documentation
- Repository: Sentence Transformers on GitHub
- Hugging Face: Sentence Transformers on Hugging Face
Full Model Architecture
SentenceTransformer(
(0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)
Usage
Direct Usage (Sentence Transformers)
First install the Sentence Transformers library:
pip install -U sentence-transformers
Then you can load this model and run inference.
from sentence_transformers import SentenceTransformer
# Download from the 🤗 Hub
model = SentenceTransformer("karsar/paraphrase-multilingual-MiniLM-L12-hu_v1")
# Run inference
sentences = [
'Az emberek alszanak.',
'Egy apa és a fia ölelgeti alvás közben.',
'Egy csoport ember ül egy nyitott, térszerű területen, mögötte nagy bokrok és egy sor viktoriánus stílusú épület, melyek közül sokat a kép jobb oldalán lévő erős elmosódás tesz kivehetetlenné.',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 384]
# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]
Evaluation
Metrics
Triplet
- Dataset:
all-nli-dev
- Evaluated with
TripletEvaluator
Metric | Value |
---|---|
cosine_accuracy | 0.992 |
dot_accuracy | 0.0108 |
manhattan_accuracy | 0.9908 |
euclidean_accuracy | 0.9908 |
max_accuracy | 0.992 |
Triplet
- Dataset:
all-nli-test
- Evaluated with
TripletEvaluator
Metric | Value |
---|---|
cosine_accuracy | 0.9914 |
dot_accuracy | 0.0139 |
manhattan_accuracy | 0.9909 |
euclidean_accuracy | 0.9911 |
max_accuracy | 0.9914 |
Training Details
Training Dataset
train
- Dataset: train
- Size: 857,856 training samples
- Columns:
anchor
,positive
, andnegative
- Approximate statistics based on the first 1000 samples:
anchor positive negative type string string string details - min: 7 tokens
- mean: 11.73 tokens
- max: 56 tokens
- min: 6 tokens
- mean: 15.24 tokens
- max: 47 tokens
- min: 7 tokens
- mean: 16.07 tokens
- max: 53 tokens
- Samples:
anchor positive negative Egy lóháton ülő ember átugrik egy lerombolt repülőgép felett.
Egy ember a szabadban, lóháton.
Egy ember egy étteremben van, és omlettet rendel.
Gyerekek mosolyogva és integetett a kamera
Gyermekek vannak jelen
A gyerekek homlokot rántanak
Egy fiú ugrál a gördeszkát a közepén egy piros híd.
A fiú gördeszkás trükköt csinál.
A fiú korcsolyázik a járdán.
- Loss:
MultipleNegativesRankingLoss
with these parameters:{ "scale": 20.0, "similarity_fct": "cos_sim" }
Evaluation Dataset
train
- Dataset: train
- Size: 5,000 evaluation samples
- Columns:
anchor
,positive
, andnegative
- Approximate statistics based on the first 1000 samples:
anchor positive negative type string string string details - min: 7 tokens
- mean: 11.73 tokens
- max: 56 tokens
- min: 6 tokens
- mean: 15.24 tokens
- max: 47 tokens
- min: 7 tokens
- mean: 16.07 tokens
- max: 53 tokens
- Samples:
anchor positive negative Egy lóháton ülő ember átugrik egy lerombolt repülőgép felett.
Egy ember a szabadban, lóháton.
Egy ember egy étteremben van, és omlettet rendel.
Gyerekek mosolyogva és integetett a kamera
Gyermekek vannak jelen
A gyerekek homlokot rántanak
Egy fiú ugrál a gördeszkát a közepén egy piros híd.
A fiú gördeszkás trükköt csinál.
A fiú korcsolyázik a járdán.
- Loss:
MultipleNegativesRankingLoss
with these parameters:{ "scale": 20.0, "similarity_fct": "cos_sim" }
Training Hyperparameters
Non-Default Hyperparameters
eval_strategy
: stepsper_device_train_batch_size
: 128per_device_eval_batch_size
: 128num_train_epochs
: 1warmup_ratio
: 0.1bf16
: Truebatch_sampler
: no_duplicates
All Hyperparameters
Click to expand
overwrite_output_dir
: Falsedo_predict
: Falseeval_strategy
: stepsprediction_loss_only
: Trueper_device_train_batch_size
: 128per_device_eval_batch_size
: 128per_gpu_train_batch_size
: Noneper_gpu_eval_batch_size
: Nonegradient_accumulation_steps
: 1eval_accumulation_steps
: Nonetorch_empty_cache_steps
: Nonelearning_rate
: 5e-05weight_decay
: 0.0adam_beta1
: 0.9adam_beta2
: 0.999adam_epsilon
: 1e-08max_grad_norm
: 1.0num_train_epochs
: 1max_steps
: -1lr_scheduler_type
: linearlr_scheduler_kwargs
: {}warmup_ratio
: 0.1warmup_steps
: 0log_level
: passivelog_level_replica
: warninglog_on_each_node
: Truelogging_nan_inf_filter
: Truesave_safetensors
: Truesave_on_each_node
: Falsesave_only_model
: Falserestore_callback_states_from_checkpoint
: Falseno_cuda
: Falseuse_cpu
: Falseuse_mps_device
: Falseseed
: 42data_seed
: Nonejit_mode_eval
: Falseuse_ipex
: Falsebf16
: Truefp16
: Falsefp16_opt_level
: O1half_precision_backend
: autobf16_full_eval
: Falsefp16_full_eval
: Falsetf32
: Nonelocal_rank
: 0ddp_backend
: Nonetpu_num_cores
: Nonetpu_metrics_debug
: Falsedebug
: []dataloader_drop_last
: Falsedataloader_num_workers
: 0dataloader_prefetch_factor
: Nonepast_index
: -1disable_tqdm
: Falseremove_unused_columns
: Truelabel_names
: Noneload_best_model_at_end
: Falseignore_data_skip
: Falsefsdp
: []fsdp_min_num_params
: 0fsdp_config
: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}fsdp_transformer_layer_cls_to_wrap
: Noneaccelerator_config
: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}deepspeed
: Nonelabel_smoothing_factor
: 0.0optim
: adamw_torchoptim_args
: Noneadafactor
: Falsegroup_by_length
: Falselength_column_name
: lengthddp_find_unused_parameters
: Noneddp_bucket_cap_mb
: Noneddp_broadcast_buffers
: Falsedataloader_pin_memory
: Truedataloader_persistent_workers
: Falseskip_memory_metrics
: Trueuse_legacy_prediction_loop
: Falsepush_to_hub
: Falseresume_from_checkpoint
: Nonehub_model_id
: Nonehub_strategy
: every_savehub_private_repo
: Falsehub_always_push
: Falsegradient_checkpointing
: Falsegradient_checkpointing_kwargs
: Noneinclude_inputs_for_metrics
: Falseeval_do_concat_batches
: Truefp16_backend
: autopush_to_hub_model_id
: Nonepush_to_hub_organization
: Nonemp_parameters
:auto_find_batch_size
: Falsefull_determinism
: Falsetorchdynamo
: Noneray_scope
: lastddp_timeout
: 1800torch_compile
: Falsetorch_compile_backend
: Nonetorch_compile_mode
: Nonedispatch_batches
: Nonesplit_batches
: Noneinclude_tokens_per_second
: Falseinclude_num_input_tokens_seen
: Falseneftune_noise_alpha
: Noneoptim_target_modules
: Nonebatch_eval_metrics
: Falseeval_on_start
: Falseeval_use_gather_object
: Falsebatch_sampler
: no_duplicatesmulti_dataset_batch_sampler
: proportional
Training Logs
Epoch | Step | Training Loss | train loss | all-nli-dev_max_accuracy | all-nli-test_max_accuracy |
---|---|---|---|---|---|
0 | 0 | - | - | 0.7574 | - |
0.0149 | 100 | 2.5002 | - | - | - |
0.0298 | 200 | 1.9984 | - | - | - |
0.0448 | 300 | 1.8094 | - | - | - |
0.0597 | 400 | 1.6704 | - | - | - |
0.0746 | 500 | 1.5518 | - | - | - |
0.0895 | 600 | 1.449 | - | - | - |
0.1044 | 700 | 1.5998 | - | - | - |
0.1194 | 800 | 1.5725 | - | - | - |
0.1343 | 900 | 1.5341 | - | - | - |
0.1492 | 1000 | 1.3423 | - | - | - |
0.1641 | 1100 | 1.2485 | - | - | - |
0.1791 | 1200 | 1.1527 | - | - | - |
0.1940 | 1300 | 1.1672 | - | - | - |
0.2089 | 1400 | 1.2426 | - | - | - |
0.2238 | 1500 | 1.0948 | - | - | - |
0.2387 | 1600 | 1.0069 | - | - | - |
0.2537 | 1700 | 0.976 | - | - | - |
0.2686 | 1800 | 0.897 | - | - | - |
0.2835 | 1900 | 0.7825 | - | - | - |
0.2984 | 2000 | 0.9421 | 0.1899 | 0.9568 | - |
0.3133 | 2100 | 0.8651 | - | - | - |
0.3283 | 2200 | 0.8184 | - | - | - |
0.3432 | 2300 | 0.699 | - | - | - |
0.3581 | 2400 | 0.6704 | - | - | - |
0.3730 | 2500 | 0.6477 | - | - | - |
0.3879 | 2600 | 0.7077 | - | - | - |
0.4029 | 2700 | 0.7364 | - | - | - |
0.4178 | 2800 | 0.665 | - | - | - |
0.4327 | 2900 | 1.2512 | - | - | - |
0.4476 | 3000 | 1.3693 | - | - | - |
0.4625 | 3100 | 1.3959 | - | - | - |
0.4775 | 3200 | 1.4175 | - | - | - |
0.4924 | 3300 | 1.402 | - | - | - |
0.5073 | 3400 | 1.3832 | - | - | - |
0.5222 | 3500 | 1.3671 | - | - | - |
0.5372 | 3600 | 1.3666 | - | - | - |
0.5521 | 3700 | 1.3479 | - | - | - |
0.5670 | 3800 | 1.3272 | - | - | - |
0.5819 | 3900 | 1.3353 | - | - | - |
0.5968 | 4000 | 1.3177 | 0.0639 | 0.9902 | - |
0.6118 | 4100 | 1.3068 | - | - | - |
0.6267 | 4200 | 1.3054 | - | - | - |
0.6416 | 4300 | 1.3098 | - | - | - |
0.6565 | 4400 | 1.2839 | - | - | - |
0.6714 | 4500 | 1.2976 | - | - | - |
0.6864 | 4600 | 1.2669 | - | - | - |
0.7013 | 4700 | 1.208 | - | - | - |
0.7162 | 4800 | 1.194 | - | - | - |
0.7311 | 4900 | 1.1974 | - | - | - |
0.7460 | 5000 | 1.1834 | - | - | - |
0.7610 | 5100 | 1.1876 | - | - | - |
0.7759 | 5200 | 1.1743 | - | - | - |
0.7908 | 5300 | 1.1839 | - | - | - |
0.8057 | 5400 | 1.1778 | - | - | - |
0.8207 | 5500 | 1.1711 | - | - | - |
0.8356 | 5600 | 1.1809 | - | - | - |
0.8505 | 5700 | 1.1825 | - | - | - |
0.8654 | 5800 | 1.1795 | - | - | - |
0.8803 | 5900 | 1.1788 | - | - | - |
0.8953 | 6000 | 1.1819 | 0.0371 | 0.992 | - |
0.9102 | 6100 | 1.1741 | - | - | - |
0.9251 | 6200 | 1.1871 | - | - | - |
0.9400 | 6300 | 0.498 | - | - | - |
0.9549 | 6400 | 0.093 | - | - | - |
0.9699 | 6500 | 0.1597 | - | - | - |
0.9848 | 6600 | 0.2033 | - | - | - |
0.9997 | 6700 | 0.16 | - | - | - |
1.0 | 6702 | - | - | - | 0.9914 |
Framework Versions
- Python: 3.11.8
- Sentence Transformers: 3.1.1
- Transformers: 4.44.0
- PyTorch: 2.3.0.post101
- Accelerate: 0.33.0
- Datasets: 2.18.0
- Tokenizers: 0.19.0
Citation
BibTeX
Sentence Transformers
@inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
url = "https://arxiv.org/abs/1908.10084",
}
MultipleNegativesRankingLoss
@misc{henderson2017efficient,
title={Efficient Natural Language Response Suggestion for Smart Reply},
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
year={2017},
eprint={1705.00652},
archivePrefix={arXiv},
primaryClass={cs.CL}
}