Katsumata420 commited on 3 days ago

Commit

e470306

verified ·

1 Parent(s): 49bed2a

Upload 92 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

1_Pooling/config.json +10 -0
config.json +47 -0
config_sentence_transformers.json +14 -0
jmteb/jmteb.jsonnet +22 -0
jmteb/results/Classification/scores_amazon_counterfactual_classification.json +23 -0
jmteb/results/Classification/scores_amazon_review_classification.json +23 -0
jmteb/results/Classification/scores_massive_intent_classification.json +23 -0
jmteb/results/Classification/scores_massive_scenario_classification.json +23 -0
jmteb/results/Clustering/scores_livedoor_news.json +36 -0
jmteb/results/Clustering/scores_mewsc16.json +36 -0
jmteb/results/PairClassification/scores_paws_x_ja.json +41 -0
jmteb/results/Reranking/scores_esci.json +31 -0
jmteb/results/Retrieval/scores_jagovfaqs_22k.json +43 -0
jmteb/results/Retrieval/scores_jaqket.json +43 -0
jmteb/results/Retrieval/scores_mrtydi.json +43 -0
jmteb/results/Retrieval/scores_nlp_journal_abs_intro.json +43 -0
jmteb/results/Retrieval/scores_nlp_journal_title_abs.json +43 -0
jmteb/results/Retrieval/scores_nlp_journal_title_intro.json +43 -0
jmteb/results/STS/scores_jsick.json +31 -0
jmteb/results/STS/scores_jsts.json +31 -0
jmteb/results/summary.json +62 -0
jmteb/tasks/amazon_counterfactual_classification.jsonnet +32 -0
jmteb/tasks/amazon_review_classification.jsonnet +32 -0
jmteb/tasks/esci.jsonnet +33 -0
jmteb/tasks/jagovfaqs_22k.jsonnet +33 -0
jmteb/tasks/jaqket.jsonnet +33 -0
jmteb/tasks/jsick.jsonnet +25 -0
jmteb/tasks/jsts.jsonnet +25 -0
jmteb/tasks/livedoor_news.jsonnet +24 -0
jmteb/tasks/massive_intent_classification.jsonnet +32 -0
jmteb/tasks/massive_scenario_classification.jsonnet +32 -0
jmteb/tasks/mewsc16.jsonnet +24 -0
jmteb/tasks/mrtydi.jsonnet +34 -0
jmteb/tasks/nlp_journal_abs_intro.jsonnet +33 -0
jmteb/tasks/nlp_journal_title_abs.jsonnet +33 -0
jmteb/tasks/nlp_journal_title_intro.jsonnet +33 -0
jmteb/tasks/paws_x_ja.jsonnet +25 -0
model.safetensors +3 -0
modules.json +20 -0
mteb/models/__init__.py +10 -0
mteb/models/default.py +4 -0
mteb/models/retrieva.py +13 -0
mteb/models/retrieva_en.py +15 -0
mteb/mteb_eval.py +49 -0
mteb/results/AmazonCounterfactualClassification.json +95 -0
mteb/results/ArXivHierarchicalClusteringP2P.json +46 -0
mteb/results/ArXivHierarchicalClusteringS2S.json +46 -0
mteb/results/ArguAna.json +158 -0
mteb/results/AskUbuntuDupQuestions.json +26 -0
mteb/results/BIOSSES.json +26 -0

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 768,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": true,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "_name_or_path": "sbintuitions/modernbert-ja-310m",
+  "architectures": [
+    "ModernBertModel"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "classifier_activation": "gelu",
+  "classifier_bias": false,
+  "classifier_dropout": 0.0,
+  "classifier_pooling": "cls",
+  "cls_token_id": 6,
+  "decoder_bias": true,
+  "deterministic_flash_attn": false,
+  "embedding_dropout": 0.0,
+  "eos_token_id": 2,
+  "global_attn_every_n_layers": 3,
+  "global_rope_theta": 160000.0,
+  "gradient_checkpointing": false,
+  "hidden_activation": "gelu",
+  "hidden_size": 768,
+  "initializer_cutoff_factor": 2.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "local_attention": 128,
+  "local_rope_theta": 10000.0,
+  "max_position_embeddings": 8192,
+  "mlp_bias": false,
+  "mlp_dropout": 0.0,
+  "model_type": "modernbert",
+  "norm_bias": false,
+  "norm_eps": 1e-05,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 25,
+  "pad_token_id": 3,
+  "position_embedding_type": "rope",
+  "reference_compile": false,
+  "repad_logits_with_grad": false,
+  "sep_token_id": 4,
+  "sparse_pred_ignore_index": -100,
+  "sparse_prediction": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.49.0",
+  "vocab_size": 102400
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "__version__": {
+    "sentence_transformers": "3.4.1",
+    "transformers": "4.49.0",
+    "pytorch": "2.5.1+cu121"
+  },
+  "prompts": {
+      "Retrieval-query": "関連した文書を探すために次の文を表現して\n",
+      "Retrieval-passage": "次の文章を表現して\n",
+      "default": "同じ意味の文を探すために次の文を表現して\n"
+  },
+  "default_prompt_name": "default",
+  "similarity_fn_name": "cosine"
+}

jmteb/jmteb.jsonnet ADDED Viewed

	@@ -0,0 +1,22 @@

+// Classification
+(import './tasks/amazon_review_classification.jsonnet') +
+(import './tasks/amazon_counterfactual_classification.jsonnet') +
+(import './tasks/massive_intent_classification.jsonnet') +
+(import './tasks/massive_scenario_classification.jsonnet') +
+// Clustering
+(import './tasks/livedoor_news.jsonnet') +
+(import './tasks/mewsc16.jsonnet') +
+// STS
+(import './tasks/jsts.jsonnet') +
+(import './tasks/jsick.jsonnet') +
+// Pair Classification
+(import './tasks/paws_x_ja.jsonnet') +
+// Retrieval
+(import './tasks/jagovfaqs_22k.jsonnet') +
+(import './tasks/mrtydi.jsonnet') +
+(import './tasks/jaqket.jsonnet') +
+(import './tasks/nlp_journal_title_abs.jsonnet') +
+(import './tasks/nlp_journal_title_intro.jsonnet') +
+(import './tasks/nlp_journal_abs_intro.jsonnet') +
+// Reranking
+(import './tasks/esci.jsonnet')

jmteb/results/Classification/scores_amazon_counterfactual_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.7690321272929969,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.907725321888412,
+                "macro_f1": 0.672212134596195
+            },
+            "logreg": {
+                "accuracy": 0.9313304721030042,
+                "macro_f1": 0.759173126614987
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.9346895074946466,
+                "macro_f1": 0.7690321272929969
+            }
+        }
+    }
+}

jmteb/results/Classification/scores_amazon_review_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.5998172978417656,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.4392,
+                "macro_f1": 0.4293118582606878
+            },
+            "logreg": {
+                "accuracy": 0.5954,
+                "macro_f1": 0.5900254170486042
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.6046,
+                "macro_f1": 0.5998172978417656
+            }
+        }
+    }
+}

jmteb/results/Classification/scores_massive_intent_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.8153216318848042,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.7998032464338416,
+                "macro_f1": 0.7840757781194604
+            },
+            "logreg": {
+                "accuracy": 0.8666994589276931,
+                "macro_f1": 0.8136832325973621
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.8638197713517148,
+                "macro_f1": 0.8153216318848042
+            }
+        }
+    }
+}

jmteb/results/Classification/scores_massive_scenario_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.9014240422977099,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.8711264141662568,
+                "macro_f1": 0.8669048603927182
+            },
+            "logreg": {
+                "accuracy": 0.9011313330054107,
+                "macro_f1": 0.893877736725918
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.9041694687289845,
+                "macro_f1": 0.9014240422977099
+            }
+        }
+    }
+}

jmteb/results/Clustering/scores_livedoor_news.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+    "metric_name": "v_measure_score",
+    "metric_value": 0.513545352498706,
+    "details": {
+        "optimal_clustering_model_name": "MiniBatchKMeans",
+        "val_scores": {
+            "MiniBatchKMeans": {
+                "v_measure_score": 0.5140841329017503,
+                "homogeneity_score": 0.5052453627266255,
+                "completeness_score": 0.5232376606138658
+            },
+            "AgglomerativeClustering": {
+                "v_measure_score": 0.49350214308585105,
+                "homogeneity_score": 0.4873068478340836,
+                "completeness_score": 0.49985699253269256
+            },
+            "BisectingKMeans": {
+                "v_measure_score": 0.4843217444145435,
+                "homogeneity_score": 0.48227844059111663,
+                "completeness_score": 0.48638243593076996
+            },
+            "Birch": {
+                "v_measure_score": 0.5045054710151884,
+                "homogeneity_score": 0.5008173784727417,
+                "completeness_score": 0.5082482858481403
+            }
+        },
+        "test_scores": {
+            "MiniBatchKMeans": {
+                "v_measure_score": 0.513545352498706,
+                "homogeneity_score": 0.5099866166637427,
+                "completeness_score": 0.5171541037503654
+            }
+        }
+    }
+}

jmteb/results/Clustering/scores_mewsc16.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+    "metric_name": "v_measure_score",
+    "metric_value": 0.46097799248263915,
+    "details": {
+        "optimal_clustering_model_name": "AgglomerativeClustering",
+        "val_scores": {
+            "MiniBatchKMeans": {
+                "v_measure_score": 0.44916188797792883,
+                "homogeneity_score": 0.49147958259688423,
+                "completeness_score": 0.41355380899134786
+            },
+            "AgglomerativeClustering": {
+                "v_measure_score": 0.5246463072498976,
+                "homogeneity_score": 0.5663240673439284,
+                "completeness_score": 0.4886824631609394
+            },
+            "BisectingKMeans": {
+                "v_measure_score": 0.39737928507985054,
+                "homogeneity_score": 0.43737570574597956,
+                "completeness_score": 0.36408503061737185
+            },
+            "Birch": {
+                "v_measure_score": 0.5160631364820057,
+                "homogeneity_score": 0.5643018754693391,
+                "completeness_score": 0.4754221824714356
+            }
+        },
+        "test_scores": {
+            "AgglomerativeClustering": {
+                "v_measure_score": 0.46097799248263915,
+                "homogeneity_score": 0.4967671593496861,
+                "completeness_score": 0.42999907535625936
+            }
+        }
+    }
+}

jmteb/results/PairClassification/scores_paws_x_ja.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+    "metric_name": "binary_f1",
+    "metric_value": 0.6097337006427915,
+    "details": {
+        "optimal_distance_metric": "euclidean_distances",
+        "val_scores": {
+            "cosine_distances": {
+                "accuracy": 0.5725,
+                "accuracy_threshold": -0.05995553731918335,
+                "binary_f1": 0.5979670522257273,
+                "binary_f1_threshold": 1.0
+            },
+            "manhatten_distances": {
+                "accuracy": 0.648,
+                "accuracy_threshold": 6.833098888397217,
+                "binary_f1": 0.6174142480211082,
+                "binary_f1_threshold": 12.269868850708008
+            },
+            "euclidean_distances": {
+                "accuracy": 0.6465,
+                "accuracy_threshold": 0.3111177384853363,
+                "binary_f1": 0.6183574879227053,
+                "binary_f1_threshold": 0.564425528049469
+            },
+            "dot_similarities": {
+                "accuracy": 0.646,
+                "accuracy_threshold": 0.9595050811767578,
+                "binary_f1": 0.618229854689564,
+                "binary_f1_threshold": 0.8423429727554321
+            }
+        },
+        "test_scores": {
+            "euclidean_distances": {
+                "accuracy": 0.615,
+                "accuracy_threshold": 0.3111177384853363,
+                "binary_f1": 0.6097337006427915,
+                "binary_f1_threshold": 0.564425528049469
+            }
+        }
+    }
+}

jmteb/results/Reranking/scores_esci.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.9354186207520728,
+    "details": {
+        "optimal_distance_metric": "euclidean_distance",
+        "val_scores": {
+            "cosine_similarity": {
+                "ndcg@10": 0.9477835725930323,
+                "ndcg@20": 0.9591879767306916,
+                "ndcg@40": 0.9667225066187783
+            },
+            "dot_score": {
+                "ndcg@10": 0.9476098413475649,
+                "ndcg@20": 0.9589807025526251,
+                "ndcg@40": 0.9665249592723859
+            },
+            "euclidean_distance": {
+                "ndcg@10": 0.9477934218097472,
+                "ndcg@20": 0.9591607950860748,
+                "ndcg@40": 0.9666650348508583
+            }
+        },
+        "test_scores": {
+            "euclidean_distance": {
+                "ndcg@10": 0.9354186207520728,
+                "ndcg@20": 0.9515087918879773,
+                "ndcg@40": 0.9603281546305616
+            }
+        }
+    }
+}

jmteb/results/Retrieval/scores_jagovfaqs_22k.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.7281126791454011,
+    "details": {
+        "optimal_distance_metric": "euclidean_distance",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.5946183094472068,
+                "accuracy@3": 0.7657209710441649,
+                "accuracy@5": 0.8218777420298333,
+                "accuracy@10": 0.8724773325533782,
+                "ndcg@10": 0.7358693711267098,
+                "mrr@10": 0.6918434332883007
+            },
+            "dot_score": {
+                "accuracy@1": 0.5937408599005557,
+                "accuracy@3": 0.7665984205908161,
+                "accuracy@5": 0.8215852588476162,
+                "accuracy@10": 0.8724773325533782,
+                "ndcg@10": 0.7357749849472581,
+                "mrr@10": 0.6917069411365993
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.5949107926294238,
+                "accuracy@3": 0.7642585551330798,
+                "accuracy@5": 0.8210002924831822,
+                "accuracy@10": 0.8736472652822462,
+                "ndcg@10": 0.7361924814420154,
+                "mrr@10": 0.6919672743817233
+            }
+        },
+        "test_scores": {
+            "euclidean_distance": {
+                "accuracy@1": 0.591812865497076,
+                "accuracy@3": 0.7538011695906432,
+                "accuracy@5": 0.8114035087719298,
+                "accuracy@10": 0.8649122807017544,
+                "ndcg@10": 0.7281126791454011,
+                "mrr@10": 0.6842285110925452
+            }
+        }
+    }
+}

jmteb/results/Retrieval/scores_jaqket.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.6756415397851852,
+    "details": {
+        "optimal_distance_metric": "euclidean_distance",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.5145728643216081,
+                "accuracy@3": 0.7185929648241206,
+                "accuracy@5": 0.770854271356784,
+                "accuracy@10": 0.8190954773869347,
+                "ndcg@10": 0.6730166004888566,
+                "mrr@10": 0.6255224535375292
+            },
+            "dot_score": {
+                "accuracy@1": 0.5125628140703518,
+                "accuracy@3": 0.7185929648241206,
+                "accuracy@5": 0.770854271356784,
+                "accuracy@10": 0.8190954773869347,
+                "ndcg@10": 0.6728020927265955,
+                "mrr@10": 0.625163515992662
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.5175879396984925,
+                "accuracy@3": 0.7175879396984924,
+                "accuracy@5": 0.771859296482412,
+                "accuracy@10": 0.8180904522613065,
+                "ndcg@10": 0.6737125432901869,
+                "mrr@10": 0.626774347930127
+            }
+        },
+        "test_scores": {
+            "euclidean_distance": {
+                "accuracy@1": 0.5115346038114343,
+                "accuracy@3": 0.7211634904714143,
+                "accuracy@5": 0.7713139418254764,
+                "accuracy@10": 0.8284854563691073,
+                "ndcg@10": 0.6756415397851852,
+                "mrr@10": 0.6259448981866229
+            }
+        }
+    }
+}

jmteb/results/Retrieval/scores_mrtydi.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.3799830327521453,
+    "details": {
+        "optimal_distance_metric": "dot_score",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.2510775862068966,
+                "accuracy@3": 0.43426724137931033,
+                "accuracy@5": 0.509698275862069,
+                "accuracy@10": 0.6099137931034483,
+                "ndcg@10": 0.4218713386689512,
+                "mrr@10": 0.3627116687192117
+            },
+            "dot_score": {
+                "accuracy@1": 0.2543103448275862,
+                "accuracy@3": 0.4353448275862069,
+                "accuracy@5": 0.5129310344827587,
+                "accuracy@10": 0.6088362068965517,
+                "ndcg@10": 0.4233839243705678,
+                "mrr@10": 0.36503018951833593
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.2510775862068966,
+                "accuracy@3": 0.43211206896551724,
+                "accuracy@5": 0.5129310344827587,
+                "accuracy@10": 0.6109913793103449,
+                "ndcg@10": 0.4220794997894996,
+                "mrr@10": 0.36269199849480005
+            }
+        },
+        "test_scores": {
+            "dot_score": {
+                "accuracy@1": 0.24583333333333332,
+                "accuracy@3": 0.42083333333333334,
+                "accuracy@5": 0.5027777777777778,
+                "accuracy@10": 0.6,
+                "ndcg@10": 0.3799830327521453,
+                "mrr@10": 0.3540084876543211
+            }
+        }
+    }
+}

jmteb/results/Retrieval/scores_nlp_journal_abs_intro.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.9312903487668528,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.94,
+                "accuracy@3": 0.96,
+                "accuracy@5": 0.98,
+                "accuracy@10": 0.98,
+                "ndcg@10": 0.9607938887245083,
+                "mrr@10": 0.9545
+            },
+            "dot_score": {
+                "accuracy@1": 0.94,
+                "accuracy@3": 0.96,
+                "accuracy@5": 0.98,
+                "accuracy@10": 0.98,
+                "ndcg@10": 0.9607938887245083,
+                "mrr@10": 0.9545
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.94,
+                "accuracy@3": 0.96,
+                "accuracy@5": 0.98,
+                "accuracy@10": 0.98,
+                "ndcg@10": 0.9599228286971825,
+                "mrr@10": 0.9533333333333333
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.8737623762376238,
+                "accuracy@3": 0.948019801980198,
+                "accuracy@5": 0.9678217821782178,
+                "accuracy@10": 0.9876237623762376,
+                "ndcg@10": 0.9312903487668528,
+                "mrr@10": 0.9131109539525379
+            }
+        }
+    }
+}

jmteb/results/Retrieval/scores_nlp_journal_title_abs.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.9683680126122469,
+    "details": {
+        "optimal_distance_metric": "dot_score",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.92,
+                "accuracy@3": 0.98,
+                "accuracy@5": 0.99,
+                "accuracy@10": 1.0,
+                "ndcg@10": 0.964415325130387,
+                "mrr@10": 0.9525
+            },
+            "dot_score": {
+                "accuracy@1": 0.92,
+                "accuracy@3": 0.99,
+                "accuracy@5": 0.99,
+                "accuracy@10": 1.0,
+                "ndcg@10": 0.9651085595496531,
+                "mrr@10": 0.9533333333333333
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.92,
+                "accuracy@3": 0.98,
+                "accuracy@5": 0.99,
+                "accuracy@10": 1.0,
+                "ndcg@10": 0.9631060275946723,
+                "mrr@10": 0.9508333333333333
+            }
+        },
+        "test_scores": {
+            "dot_score": {
+                "accuracy@1": 0.9381188118811881,
+                "accuracy@3": 0.9826732673267327,
+                "accuracy@5": 0.9876237623762376,
+                "accuracy@10": 0.9925742574257426,
+                "ndcg@10": 0.9683680126122469,
+                "mrr@10": 0.960258525852585
+            }
+        }
+    }
+}

jmteb/results/Retrieval/scores_nlp_journal_title_intro.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.8408362653388072,
+    "details": {
+        "optimal_distance_metric": "dot_score",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.83,
+                "accuracy@3": 0.91,
+                "accuracy@5": 0.94,
+                "accuracy@10": 0.99,
+                "ndcg@10": 0.9046856604073044,
+                "mrr@10": 0.8780952380952379
+            },
+            "dot_score": {
+                "accuracy@1": 0.83,
+                "accuracy@3": 0.91,
+                "accuracy@5": 0.94,
+                "accuracy@10": 0.99,
+                "ndcg@10": 0.9053025824811691,
+                "mrr@10": 0.8787738095238095
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.83,
+                "accuracy@3": 0.91,
+                "accuracy@5": 0.94,
+                "accuracy@10": 0.99,
+                "ndcg@10": 0.9041030740876984,
+                "mrr@10": 0.8774563492063492
+            }
+        },
+        "test_scores": {
+            "dot_score": {
+                "accuracy@1": 0.7351485148514851,
+                "accuracy@3": 0.8613861386138614,
+                "accuracy@5": 0.9108910891089109,
+                "accuracy@10": 0.943069306930693,
+                "ndcg@10": 0.8408362653388072,
+                "mrr@10": 0.8077675624705328
+            }
+        }
+    }
+}

jmteb/results/STS/scores_jsick.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "metric_name": "spearman",
+    "metric_value": 0.743657520470515,
+    "details": {
+        "optimal_similarity_metric": "dot_score",
+        "val_scores": {
+            "cosine_similarity": {
+                "pearson": 0.7957368400871296,
+                "spearman": 0.762797232405231
+            },
+            "manhatten_distance": {
+                "pearson": 0.7896085210418337,
+                "spearman": 0.7623109878831168
+            },
+            "euclidean_distance": {
+                "pearson": 0.7896085210418337,
+                "spearman": 0.7623109878831168
+            },
+            "dot_score": {
+                "pearson": 0.7957067931754913,
+                "spearman": 0.7628188190178943
+            }
+        },
+        "test_scores": {
+            "dot_score": {
+                "pearson": 0.7800093069496337,
+                "spearman": 0.743657520470515
+            }
+        }
+    }
+}

jmteb/results/STS/scores_jsts.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "metric_name": "spearman",
+    "metric_value": 0.8428310988577061,
+    "details": {
+        "optimal_similarity_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "pearson": 0.8663617486013027,
+                "spearman": 0.8264545526446698
+            },
+            "manhatten_distance": {
+                "pearson": 0.8624142417397704,
+                "spearman": 0.8263746662985753
+            },
+            "euclidean_distance": {
+                "pearson": 0.8624142417397704,
+                "spearman": 0.8263746662985753
+            },
+            "dot_score": {
+                "pearson": 0.8663097123455762,
+                "spearman": 0.8263795191808255
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "pearson": 0.8833575064948627,
+                "spearman": 0.8428310988577061
+            }
+        }
+    }
+}

jmteb/results/summary.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+    "Classification": {
+        "amazon_counterfactual_classification": {
+            "macro_f1": 0.7690321272929969
+        },
+        "amazon_review_classification": {
+            "macro_f1": 0.5998172978417656
+        },
+        "massive_intent_classification": {
+            "macro_f1": 0.8153216318848042
+        },
+        "massive_scenario_classification": {
+            "macro_f1": 0.9014240422977099
+        }
+    },
+    "Reranking": {
+        "esci": {
+            "ndcg@10": 0.9354186207520728
+        }
+    },
+    "Retrieval": {
+        "jagovfaqs_22k": {
+            "ndcg@10": 0.7281126791454011
+        },
+        "jaqket": {
+            "ndcg@10": 0.6756415397851852
+        },
+        "mrtydi": {
+            "ndcg@10": 0.3799830327521453
+        },
+        "nlp_journal_abs_intro": {
+            "ndcg@10": 0.9312903487668528
+        },
+        "nlp_journal_title_abs": {
+            "ndcg@10": 0.9683680126122469
+        },
+        "nlp_journal_title_intro": {
+            "ndcg@10": 0.8408362653388072
+        }
+    },
+    "STS": {
+        "jsick": {
+            "spearman": 0.743657520470515
+        },
+        "jsts": {
+            "spearman": 0.8428310988577061
+        }
+    },
+    "Clustering": {
+        "livedoor_news": {
+            "v_measure_score": 0.513545352498706
+        },
+        "mewsc16": {
+            "v_measure_score": 0.46097799248263915
+        }
+    },
+    "PairClassification": {
+        "paws_x_ja": {
+            "binary_f1": 0.6097337006427915
+        }
+    }
+}

jmteb/tasks/amazon_counterfactual_classification.jsonnet ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  amazon_counterfactual_classification: {
+    class_path: 'ClassificationEvaluator',
+    init_args: {
+      train_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'train',
+          name: 'amazon_counterfactual_classification',
+        },
+      },
+      val_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'amazon_counterfactual_classification',
+        },
+      },
+      test_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'amazon_counterfactual_classification',
+        },
+      },
+      prefix: '同じクラスに属する文を探すために次の文を表現して\n',
+    },
+  },
+}

jmteb/tasks/amazon_review_classification.jsonnet ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  amazon_review_classification: {
+    class_path: 'ClassificationEvaluator',
+    init_args: {
+      train_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'train',
+          name: 'amazon_review_classification',
+        },
+      },
+      val_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'amazon_review_classification',
+        },
+      },
+      test_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'amazon_review_classification',
+        },
+      },
+      prefix: '同じクラスに属する文を探すために次の文を表現して\n',
+    },
+  },
+}

jmteb/tasks/esci.jsonnet ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  esci: {
+    class_path: 'RerankingEvaluator',
+    init_args: {
+      val_query_dataset: {
+        class_path: 'HfRerankingQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'esci-query',
+        },
+      },
+      test_query_dataset: {
+        class_path: 'HfRerankingQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'esci-query',
+        },
+      },
+      doc_dataset: {
+        class_path: 'HfRerankingDocDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'corpus',
+          name: 'esci-corpus',
+        },
+      },
+      query_prefix: '関連した文書を探すために次の文を表現して\n',
+      doc_prefix: '次の文章を表現して\n',
+    },
+  },
+}

jmteb/tasks/jagovfaqs_22k.jsonnet ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  jagovfaqs_22k: {
+    class_path: 'RetrievalEvaluator',
+    init_args: {
+      val_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'jagovfaqs_22k-query',
+        },
+      },
+      test_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'jagovfaqs_22k-query',
+        },
+      },
+      doc_dataset: {
+        class_path: 'HfRetrievalDocDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'corpus',
+          name: 'jagovfaqs_22k-corpus',
+        },
+      },
+      query_prefix: '関連した文書を探すために次の文を表現して\n',
+      doc_prefix: '次の文章を表現して\n',
+    },
+  },
+}

jmteb/tasks/jaqket.jsonnet ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  jaqket: {
+    class_path: 'RetrievalEvaluator',
+    init_args: {
+      val_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'jaqket-query',
+        },
+      },
+      test_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'jaqket-query',
+        },
+      },
+      doc_dataset: {
+        class_path: 'HfRetrievalDocDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'corpus',
+          name: 'jaqket-corpus',
+        },
+      },
+      query_prefix: '関連した文書を探すために次の文を表現して\n',
+      doc_prefix: '次の文章を表現して\n',
+    },
+  },
+}

jmteb/tasks/jsick.jsonnet ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  jsick: {
+    class_path: 'STSEvaluator',
+    init_args: {
+      val_dataset: {
+        class_path: 'HfSTSDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'jsick',
+        },
+      },
+      test_dataset: {
+        class_path: 'HfSTSDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'jsick',
+        },
+      },
+      sentence1_prefix: '同じ意味の文を探すために次の文を表現して\n',
+      sentence2_prefix: '同じ意味の文を探すために次の文を表現して\n',
+    },
+  },
+}

jmteb/tasks/jsts.jsonnet ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  jsts: {
+    class_path: 'STSEvaluator',
+    init_args: {
+      val_dataset: {
+        class_path: 'HfSTSDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'train',
+          name: 'jsts',
+        },
+      },
+      test_dataset: {
+        class_path: 'HfSTSDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'jsts',
+        },
+      },
+      sentence1_prefix: '同じ意味の文を探すために次の文を表現して\n',
+      sentence2_prefix: '同じ意味の文を探すために次の文を表現して\n',
+    },
+  },
+}

jmteb/tasks/livedoor_news.jsonnet ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  livedoor_news: {
+    class_path: 'ClusteringEvaluator',
+    init_args: {
+      val_dataset: {
+        class_path: 'HfClusteringDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'livedoor_news',
+        },
+      },
+      test_dataset: {
+        class_path: 'HfClusteringDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'livedoor_news',
+        },
+      },
+      prefix: '類似した文を探すために次の文を表現して\n',
+    },
+  },
+}

jmteb/tasks/massive_intent_classification.jsonnet ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  massive_intent_classification: {
+    class_path: 'ClassificationEvaluator',
+    init_args: {
+      train_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'train',
+          name: 'massive_intent_classification',
+        },
+      },
+      val_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'massive_intent_classification',
+        },
+      },
+      test_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'massive_intent_classification',
+        },
+      },
+      prefix: '同じクラスに属する文を探すために次の文を表現して\n',
+    },
+  },
+}

jmteb/tasks/massive_scenario_classification.jsonnet ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  massive_scenario_classification: {
+    class_path: 'ClassificationEvaluator',
+    init_args: {
+      train_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'train',
+          name: 'massive_scenario_classification',
+        },
+      },
+      val_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'massive_scenario_classification',
+        },
+      },
+      test_dataset: {
+        class_path: 'HfClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'massive_scenario_classification',
+        },
+      },
+      prefix: '同じクラスに属する文を探すために次の文を表現して\n',
+    },
+  },
+}

jmteb/tasks/mewsc16.jsonnet ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  mewsc16: {
+    class_path: 'ClusteringEvaluator',
+    init_args: {
+      val_dataset: {
+        class_path: 'HfClusteringDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'mewsc16_ja',
+        },
+      },
+      test_dataset: {
+        class_path: 'HfClusteringDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'mewsc16_ja',
+        },
+      },
+      prefix: '類似した文を探すために次の文を表現して\n',
+    },
+  },
+}

jmteb/tasks/mrtydi.jsonnet ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  mrtydi: {
+    class_path: 'RetrievalEvaluator',
+    init_args: {
+      val_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'mrtydi-query',
+        },
+      },
+      test_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'mrtydi-query',
+        },
+      },
+      doc_dataset: {
+        class_path: 'HfRetrievalDocDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'corpus',
+          name: 'mrtydi-corpus',
+        },
+      },
+      "doc_chunk_size":10000,
+      query_prefix: '関連した文書を探すために次の文を表現して\n',
+      doc_prefix: '次の文章を表現して\n',
+    },
+  },
+}

jmteb/tasks/nlp_journal_abs_intro.jsonnet ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  nlp_journal_abs_intro: {
+    class_path: 'RetrievalEvaluator',
+    init_args: {
+      val_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'nlp_journal_abs_intro-query',
+        },
+      },
+      test_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'nlp_journal_abs_intro-query',
+        },
+      },
+      doc_dataset: {
+        class_path: 'HfRetrievalDocDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'corpus',
+          name: 'nlp_journal_abs_intro-corpus',
+        },
+      },
+      query_prefix: '関連した文書を探すために次の文を表現して\n',
+      doc_prefix: '次の文章を表現して\n',
+    },
+  },
+}

jmteb/tasks/nlp_journal_title_abs.jsonnet ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  nlp_journal_title_abs: {
+    class_path: 'RetrievalEvaluator',
+    init_args: {
+      val_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'nlp_journal_title_abs-query',
+        },
+      },
+      test_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'nlp_journal_title_abs-query',
+        },
+      },
+      doc_dataset: {
+        class_path: 'HfRetrievalDocDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'corpus',
+          name: 'nlp_journal_title_abs-corpus',
+        },
+      },
+      query_prefix: '関連した文書を探すために次の文を表現して\n',
+      doc_prefix: '次の文章を表現して\n',
+    },
+  },
+}

jmteb/tasks/nlp_journal_title_intro.jsonnet ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  nlp_journal_title_intro: {
+    class_path: 'RetrievalEvaluator',
+    init_args: {
+      val_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'nlp_journal_title_intro-query',
+        },
+      },
+      test_query_dataset: {
+        class_path: 'HfRetrievalQueryDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'nlp_journal_title_intro-query',
+        },
+      },
+      doc_dataset: {
+        class_path: 'HfRetrievalDocDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'corpus',
+          name: 'nlp_journal_title_intro-corpus',
+        },
+      },
+      query_prefix: '関連した文書を探すために次の文を表現して\n',
+      doc_prefix: '次の文章を表現して\n',
+    },
+  },
+}

jmteb/tasks/paws_x_ja.jsonnet ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  paws_x_ja: {
+    class_path: 'PairClassificationEvaluator',
+    init_args: {
+      val_dataset: {
+        class_path: 'HfPairClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'validation',
+          name: 'paws_x_ja',
+        },
+      },
+      test_dataset: {
+        class_path: 'HfPairClassificationDataset',
+        init_args: {
+          path: 'sbintuitions/JMTEB',
+          split: 'test',
+          name: 'paws_x_ja',
+        },
+      },
+      sentence1_prefix: '同じ意味の文を探すために次の文を表現して\n',
+      sentence2_prefix: '同じ意味の文を探すために次の文を表現して\n',
+    },
+  },
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bba8db1bb981e84c4c056423407063f9b3c83bf4e9569598c1428c2a5b6c167a
+size 629238896

modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

mteb/models/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from .default import PROMPT as default_prompt
+from .retrieva import PROMPT as retrieva_prompt
+from .retrieva_en import PROMPT as retrieva_en_prompt
+PROMPTS = {
+    "default": default_prompt,
+    "retrieva": retrieva_prompt,
+    "retrieva-en": retrieva_en_prompt,
+}

mteb/models/default.py ADDED Viewed

	@@ -0,0 +1,4 @@

+PROMPT = {
+    "query": "query: ",
+    "passage": "passage: ",
+}

mteb/models/retrieva.py ADDED Viewed

	@@ -0,0 +1,13 @@

+PROMPT = {
+    "STS": "同じ意味の文を探すために次の文を表現して\n",
+    "Summarization": "次の記事またはタイトルを表現して\n",
+    "BitextMining": "次の文を表現して\n",
+    "Classification": "同じクラスに属する文を探すために次の文を表現して\n",
+    "Clustering": "類似した文を探すために次の文を表現して\n",
+    "Reranking-query": "関連した文書を探すために次の文を表現して\n",
+    "Reranking-passage": "次の文章を表現して\n",
+    "Retrieval-query": "関連した文書を探すために次の文を表現して\n",
+    "Retrieval-passage": "次の文章を表現して\n",
+    "InstructionRetrieval": "",
+    "PairClassification": "同じ意味の文を探すために次の文を表現して\n",
+}

mteb/models/retrieva_en.py ADDED Viewed

	@@ -0,0 +1,15 @@

+PROMPT = {
+    "STS": "Represent the sentence for retrieving duplicate sentences:\n",
+    "Summarization": "Represent the news article or news title for retrieval:\n",
+    "BitextMining": "Represent the sentence\n",
+    "Classification": "Represent the sentence for retrieving the sentence belonging to the same category:\n",
+    "Clustering": "Represent the sentence to find similar sentences:\n",
+    "Reranking-query": "Represent the question:\n",
+    "Reranking-passage": "Represent the following text:\n",
+    "Retrieval-query": "Represent the question:\n",
+    "Retrieval-passage": "Represent the following text:\n",
+    "InstructionRetrieval": "Retrieve text based on user query:\n",
+    "PairClassification": "Represent the sentence for retrieving duplicate sentences:\n",
+    "MultilabelClassification": "Represent the sentence for retrieving the sentence belonging to the same category:\n",
+    "Speed": "",
+}

mteb/mteb_eval.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""Evaluate AMBER models"""
+import argparse
+import mteb
+from models import PROMPTS
+BENCHMARKS = {
+    "en": "MTEB(eng, v2)",
+    "ja": "MTEB(jpn, v1)",
+}
+def get_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_type", type=str, required=True, help="Model name", choices=PROMPTS.keys())
+    parser.add_argument("--model_name_or_path", type=str, required=True)
+    parser.add_argument("--batch_size", type=int, default=32, help="Batch size")
+    parser.add_argument("--output_dir", type=str, required=True, help="Output directory")
+    parser.add_argument("--benchmark", type=str, required=True, choices=BENCHMARKS.keys())
+    parser.add_argument("--corpus_chunk_size", type=int, default=50000)
+    parser.add_argument("--convert_to_tensor", action="store_true")
+    return parser.parse_args()
+def main():
+    args = get_args()
+    prompt = PROMPTS[args.model_type]
+    model = mteb.get_model(args.model_name_or_path, model_prompts=prompt)
+    tasks = mteb.get_benchmark(BENCHMARKS[args.benchmark])
+    evaluation = mteb.MTEB(tasks=tasks)
+    encode_kwargs = {
+        "batch_size": args.batch_size,
+        "convert_to_tensor": args.convert_to_tensor,
+    }
+    evaluation.run(
+        model,
+        output_folder=args.output_dir,
+        encode_kwargs=encode_kwargs,
+        corpus_chunk_size=args.corpus_chunk_size,
+    )
+if __name__ == "__main__":
+    main()

mteb/results/AmazonCounterfactualClassification.json ADDED Viewed

	@@ -0,0 +1,95 @@

+{
+  "dataset_revision": "e8379541af4e31359cca9fbcf4b00f2671dba205",
+  "task_name": "AmazonCounterfactualClassification",
+  "mteb_version": "1.36.1",
+  "scores": {
+    "test": [
+      {
+        "accuracy": 0.733433,
+        "f1": 0.672899,
+        "f1_weighted": 0.757948,
+        "ap": 0.36123,
+        "ap_weighted": 0.36123,
+        "scores_per_experiment": [
+          {
+            "accuracy": 0.743284,
+            "f1": 0.687055,
+            "f1_weighted": 0.767834,
+            "ap": 0.378554,
+            "ap_weighted": 0.378554
+          },
+          {
+            "accuracy": 0.768657,
+            "f1": 0.709178,
+            "f1_weighted": 0.789268,
+            "ap": 0.40075,
+            "ap_weighted": 0.40075
+          },
+          {
+            "accuracy": 0.635821,
+            "f1": 0.59181,
+            "f1_weighted": 0.67343,
+            "ap": 0.295662,
+            "ap_weighted": 0.295662
+          },
+          {
+            "accuracy": 0.729851,
+            "f1": 0.67607,
+            "f1_weighted": 0.756446,
+            "ap": 0.369058,
+            "ap_weighted": 0.369058
+          },
+          {
+            "accuracy": 0.741791,
+            "f1": 0.678645,
+            "f1_weighted": 0.765391,
+            "ap": 0.361706,
+            "ap_weighted": 0.361706
+          },
+          {
+            "accuracy": 0.731343,
+            "f1": 0.662842,
+            "f1_weighted": 0.755387,
+            "ap": 0.339825,
+            "ap_weighted": 0.339825
+          },
+          {
+            "accuracy": 0.81791,
+            "f1": 0.745149,
+            "f1_weighted": 0.828073,
+            "ap": 0.434356,
+            "ap_weighted": 0.434356
+          },
+          {
+            "accuracy": 0.783582,
+            "f1": 0.715912,
+            "f1_weighted": 0.800345,
+            "ap": 0.400671,
+            "ap_weighted": 0.400671
+          },
+          {
+            "accuracy": 0.698507,
+            "f1": 0.637958,
+            "f1_weighted": 0.728119,
+            "ap": 0.321782,
+            "ap_weighted": 0.321782
+          },
+          {
+            "accuracy": 0.683582,
+            "f1": 0.624376,
+            "f1_weighted": 0.715188,
+            "ap": 0.309935,
+            "ap_weighted": 0.309935
+          }
+        ],
+        "main_score": 0.733433,
+        "hf_subset": "en",
+        "languages": [
+          "eng-Latn"
+        ]
+      }
+    ]
+  },
+  "evaluation_time": 12.824249505996704,
+  "kg_co2_emissions": null
+}

mteb/results/ArXivHierarchicalClusteringP2P.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "dataset_revision": "0bbdb47bcbe3a90093699aefeed338a0f28a7ee8",
+  "task_name": "ArXivHierarchicalClusteringP2P",
+  "mteb_version": "1.36.1",
+  "scores": {
+    "test": [
+      {
+        "v_measures": {
+          "Level 0": [
+            0.531687,
+            0.515416,
+            0.534512,
+            0.516432,
+            0.485335,
+            0.491114,
+            0.452959,
+            0.509849,
+            0.474611,
+            0.47921
+          ],
+          "Level 1": [
+            0.57501,
+            0.561921,
+            0.57618,
+            0.565423,
+            0.581718,
+            0.556907,
+            0.557507,
+            0.569016,
+            0.559128,
+            0.584777
+          ]
+        },
+        "v_measure": 0.533936,
+        "v_measure_std": 0.039727,
+        "main_score": 0.533936,
+        "hf_subset": "default",
+        "languages": [
+          "eng-Latn"
+        ]
+      }
+    ]
+  },
+  "evaluation_time": 7.786345720291138,
+  "kg_co2_emissions": null
+}

mteb/results/ArXivHierarchicalClusteringS2S.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "dataset_revision": "b73bd54100e5abfa6e3a23dcafb46fe4d2438dc3",
+  "task_name": "ArXivHierarchicalClusteringS2S",
+  "mteb_version": "1.36.1",
+  "scores": {
+    "test": [
+      {
+        "v_measures": {
+          "Level 0": [
+            0.447898,
+            0.479182,
+            0.446903,
+            0.457972,
+            0.443715,
+            0.488723,
+            0.479857,
+            0.492344,
+            0.471878,
+            0.458149
+          ],
+          "Level 1": [
+            0.55827,
+            0.55466,
+            0.567894,
+            0.586775,
+            0.541746,
+            0.576662,
+            0.574423,
+            0.552522,
+            0.536173,
+            0.556257
+          ]
+        },
+        "v_measure": 0.5136,
+        "v_measure_std": 0.049623,
+        "main_score": 0.5136,
+        "hf_subset": "default",
+        "languages": [
+          "eng-Latn"
+        ]
+      }
+    ]
+  },
+  "evaluation_time": 6.605703115463257,
+  "kg_co2_emissions": null
+}

mteb/results/ArguAna.json ADDED Viewed

	@@ -0,0 +1,158 @@

+{
+  "dataset_revision": "c22ab2a51041ffd869aaddef7af8d8215647e41a",
+  "task_name": "ArguAna",
+  "mteb_version": "1.36.1",
+  "scores": {
+    "test": [
+      {
+        "ndcg_at_1": 0.26743,
+        "ndcg_at_3": 0.40551,
+        "ndcg_at_5": 0.4555,
+        "ndcg_at_10": 0.51317,
+        "ndcg_at_20": 0.53963,
+        "ndcg_at_100": 0.55358,
+        "ndcg_at_1000": 0.55596,
+        "map_at_1": 0.26743,
+        "map_at_3": 0.37162,
+        "map_at_5": 0.39964,
+        "map_at_10": 0.42355,
+        "map_at_20": 0.431,
+        "map_at_100": 0.43313,
+        "map_at_1000": 0.43323,
+        "recall_at_1": 0.26743,
+        "recall_at_3": 0.50356,
+        "recall_at_5": 0.62376,
+        "recall_at_10": 0.80156,
+        "recall_at_20": 0.90469,
+        "recall_at_100": 0.97724,
+        "recall_at_1000": 0.99502,
+        "precision_at_1": 0.26743,
+        "precision_at_3": 0.16785,
+        "precision_at_5": 0.12475,
+        "precision_at_10": 0.08016,
+        "precision_at_20": 0.04523,
+        "precision_at_100": 0.00977,
+        "precision_at_1000": 0.001,
+        "mrr_at_1": 0.271693,
+        "mrr_at_3": 0.374111,
+        "mrr_at_5": 0.401102,
+        "mrr_at_10": 0.424939,
+        "mrr_at_20": 0.432491,
+        "mrr_at_100": 0.434578,
+        "mrr_at_1000": 0.434685,
+        "nauc_ndcg_at_1_max": -0.062333,
+        "nauc_ndcg_at_1_std": -0.079555,
+        "nauc_ndcg_at_1_diff1": 0.14512,
+        "nauc_ndcg_at_3_max": -0.021476,
+        "nauc_ndcg_at_3_std": -0.058094,
+        "nauc_ndcg_at_3_diff1": 0.09136,
+        "nauc_ndcg_at_5_max": -0.017068,
+        "nauc_ndcg_at_5_std": -0.050188,
+        "nauc_ndcg_at_5_diff1": 0.094328,
+        "nauc_ndcg_at_10_max": 0.007445,
+        "nauc_ndcg_at_10_std": -0.035482,
+        "nauc_ndcg_at_10_diff1": 0.111,
+        "nauc_ndcg_at_20_max": 0.00472,
+        "nauc_ndcg_at_20_std": -0.033913,
+        "nauc_ndcg_at_20_diff1": 0.112196,
+        "nauc_ndcg_at_100_max": -0.011079,
+        "nauc_ndcg_at_100_std": -0.038187,
+        "nauc_ndcg_at_100_diff1": 0.109808,
+        "nauc_ndcg_at_1000_max": -0.013786,
+        "nauc_ndcg_at_1000_std": -0.043135,
+        "nauc_ndcg_at_1000_diff1": 0.109463,
+        "nauc_map_at_1_max": -0.062333,
+        "nauc_map_at_1_std": -0.079555,
+        "nauc_map_at_1_diff1": 0.14512,
+        "nauc_map_at_3_max": -0.033212,
+        "nauc_map_at_3_std": -0.062437,
+        "nauc_map_at_3_diff1": 0.101283,
+        "nauc_map_at_5_max": -0.030931,
+        "nauc_map_at_5_std": -0.057626,
+        "nauc_map_at_5_diff1": 0.103327,
+        "nauc_map_at_10_max": -0.022469,
+        "nauc_map_at_10_std": -0.052611,
+        "nauc_map_at_10_diff1": 0.110171,
+        "nauc_map_at_20_max": -0.02358,
+        "nauc_map_at_20_std": -0.05255,
+        "nauc_map_at_20_diff1": 0.110437,
+        "nauc_map_at_100_max": -0.025533,
+        "nauc_map_at_100_std": -0.052893,
+        "nauc_map_at_100_diff1": 0.110186,
+        "nauc_map_at_1000_max": -0.025621,
+        "nauc_map_at_1000_std": -0.053072,
+        "nauc_map_at_1000_diff1": 0.110196,
+        "nauc_recall_at_1_max": -0.062333,
+        "nauc_recall_at_1_std": -0.079555,
+        "nauc_recall_at_1_diff1": 0.14512,
+        "nauc_recall_at_3_max": 0.012414,
+        "nauc_recall_at_3_std": -0.046148,
+        "nauc_recall_at_3_diff1": 0.0645,
+        "nauc_recall_at_5_max": 0.027998,
+        "nauc_recall_at_5_std": -0.026652,
+        "nauc_recall_at_5_diff1": 0.067526,
+        "nauc_recall_at_10_max": 0.173221,
+        "nauc_recall_at_10_std": 0.059032,
+        "nauc_recall_at_10_diff1": 0.128819,
+        "nauc_recall_at_20_max": 0.296782,
+        "nauc_recall_at_20_std": 0.164192,
+        "nauc_recall_at_20_diff1": 0.158604,
+        "nauc_recall_at_100_max": 0.287726,
+        "nauc_recall_at_100_std": 0.487738,
+        "nauc_recall_at_100_diff1": 0.158629,
+        "nauc_recall_at_1000_max": 0.310293,
+        "nauc_recall_at_1000_std": 0.527185,
+        "nauc_recall_at_1000_diff1": 0.143646,
+        "nauc_precision_at_1_max": -0.062333,
+        "nauc_precision_at_1_std": -0.079555,
+        "nauc_precision_at_1_diff1": 0.14512,
+        "nauc_precision_at_3_max": 0.012414,
+        "nauc_precision_at_3_std": -0.046148,
+        "nauc_precision_at_3_diff1": 0.0645,
+        "nauc_precision_at_5_max": 0.027998,
+        "nauc_precision_at_5_std": -0.026652,
+        "nauc_precision_at_5_diff1": 0.067526,
+        "nauc_precision_at_10_max": 0.173221,
+        "nauc_precision_at_10_std": 0.059032,
+        "nauc_precision_at_10_diff1": 0.128819,
+        "nauc_precision_at_20_max": 0.296782,
+        "nauc_precision_at_20_std": 0.164192,
+        "nauc_precision_at_20_diff1": 0.158604,
+        "nauc_precision_at_100_max": 0.287726,
+        "nauc_precision_at_100_std": 0.487738,
+        "nauc_precision_at_100_diff1": 0.158629,
+        "nauc_precision_at_1000_max": 0.310293,
+        "nauc_precision_at_1000_std": 0.527185,
+        "nauc_precision_at_1000_diff1": 0.143646,
+        "nauc_mrr_at_1_max": -0.060675,
+        "nauc_mrr_at_1_std": -0.070284,
+        "nauc_mrr_at_1_diff1": 0.131112,
+        "nauc_mrr_at_3_max": -0.038593,
+        "nauc_mrr_at_3_std": -0.059281,
+        "nauc_mrr_at_3_diff1": 0.08807,
+        "nauc_mrr_at_5_max": -0.036333,
+        "nauc_mrr_at_5_std": -0.053817,
+        "nauc_mrr_at_5_diff1": 0.090466,
+        "nauc_mrr_at_10_max": -0.028869,
+        "nauc_mrr_at_10_std": -0.049811,
+        "nauc_mrr_at_10_diff1": 0.095897,
+        "nauc_mrr_at_20_max": -0.029609,
+        "nauc_mrr_at_20_std": -0.049429,
+        "nauc_mrr_at_20_diff1": 0.096326,
+        "nauc_mrr_at_100_max": -0.0315,
+        "nauc_mrr_at_100_std": -0.049643,
+        "nauc_mrr_at_100_diff1": 0.096056,
+        "nauc_mrr_at_1000_max": -0.03159,
+        "nauc_mrr_at_1000_std": -0.04982,
+        "nauc_mrr_at_1000_diff1": 0.096061,
+        "main_score": 0.51317,
+        "hf_subset": "default",
+        "languages": [
+          "eng-Latn"
+        ]
+      }
+    ]
+  },
+  "evaluation_time": 51.13386678695679,
+  "kg_co2_emissions": null
+}

mteb/results/AskUbuntuDupQuestions.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "dataset_revision": "2000358ca161889fa9c082cb41daa8dcfb161a54",
+  "task_name": "AskUbuntuDupQuestions",
+  "mteb_version": "1.36.1",
+  "scores": {
+    "test": [
+      {
+        "map": 0.580233,
+        "mrr": 0.705882,
+        "nAUC_map_max": 0.208533,
+        "nAUC_map_std": 0.126123,
+        "nAUC_map_diff1": 0.013859,
+        "nAUC_mrr_max": 0.33692,
+        "nAUC_mrr_std": 0.141764,
+        "nAUC_mrr_diff1": 0.142379,
+        "main_score": 0.580233,
+        "hf_subset": "default",
+        "languages": [
+          "eng-Latn"
+        ]
+      }
+    ]
+  },
+  "evaluation_time": 4.280848503112793,
+  "kg_co2_emissions": null
+}

mteb/results/BIOSSES.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "dataset_revision": "d3fb88f8f02e40887cd149695127462bbcf29b4a",
+  "task_name": "BIOSSES",
+  "mteb_version": "1.36.1",
+  "scores": {
+    "test": [
+      {
+        "pearson": 0.834314,
+        "spearman": 0.787367,
+        "cosine_pearson": 0.834314,
+        "cosine_spearman": 0.787367,
+        "manhattan_pearson": 0.821388,
+        "manhattan_spearman": 0.78747,
+        "euclidean_pearson": 0.821716,
+        "euclidean_spearman": 0.787367,
+        "main_score": 0.787367,
+        "hf_subset": "default",
+        "languages": [
+          "eng-Latn"
+        ]
+      }
+    ]
+  },
+  "evaluation_time": 0.5205843448638916,
+  "kg_co2_emissions": null
+}