voyage-3-m-exp
This repo contains the tokenizer and evaluation results of the voyage-3-m-exp
embedding model. voyage-3-m-exp
is an intermediate snapshot of voyage general-purpose embeddings that are tailored to datasets similar to MTEB. The training sets of MTEB datasets are used in training voyage-3-m-exp
.
We note that for production use cases, voyage-3-large
is highly recommended and likey strictly better than voyage-3-m-exp
. Please see the blogpost for more information about voyage-3-large
.
voyage-3-m-exp
can be accessed via the Voyage API with the model name "voyage-3-m-exp"
.
Reproduction of MTEB results
Similar to most open source models on top of the leaderboard, voyage-3-m-exp
uses task-specific prompts. To reproduce MTEB results on the leaderboard, please set input_type
of the API to None
and prepend the following prompts to the input. For retrieval tasks, please use only text
without adding the title
field in front of the text.
{
"BIOSSES" : {
"query": "Represent the sentence to retrieve similar sentence: ",
"document": "Represent the sentence to retrieve similar sentence: ",
},
"SICK-R": {
"query": "Represent the sentence to retrieve similar sentence: ",
"document": "Represent the sentence to retrieve similar sentence: ",
},
"STS12": {
"query": "Represent the sentence to retrieve similar sentence: ",
"document": "Represent the sentence to retrieve similar sentence: ",
},
"STS13": {
"query": "Represent the sentence to retrieve similar sentence: ",
"document": "Represent the sentence to retrieve similar sentence: ",
},
"STS14": {
"query": "Represent the sentence to retrieve similar sentence: ",
"document": "Represent the sentence to retrieve similar sentence: ",
},
"STS15": {
"query": "Represent the sentence to retrieve similar sentence: ",
"document": "Represent the sentence to retrieve similar sentence: ",
},
"STS16": {
"query": "Represent the sentence to retrieve similar sentence: ",
"document": "Represent the sentence to retrieve similar sentence: ",
},
"STSBenchmark": {
"query": "Represent the sentence to retrieve similar sentence: ",
"document": "Represent the sentence to retrieve similar sentence: ",
},
"STS17": {
"query": "Represent the sentence to retrieve similar sentence: ",
"document": "Represent the sentence to retrieve similar sentence: ",
},
"STS22": {
"query": "Represent the sentence to retrieve similar sentence: ",
"document": "Represent the sentence to retrieve similar sentence: ",
},
"AmazonCounterfactualClassification": {
"query": "Classify the text: ",
"document": "Classify the text: ",
},
"AmazonPolarityClassification": {
"query": "Classify the text: ",
"document": "Classify the text: ",
},
"AmazonReviewsClassification": {
"query": "Classify the text: ",
"document": "Classify the text: ",
},
"Banking77Classification": {
"query": "Classify the text: ",
"document": "Classify the text: ",
},
"EmotionClassification": {
"query": "Classify the text: ",
"document": "Classify the text: ",
},
"ImdbClassification": {
"query": "Classify the text: ",
"document": "Classify the text: ",
},
"MassiveIntentClassification": {
"query": "Classify the text: ",
"document": "Classify the text: ",
},
"MassiveScenarioClassification": {
"query": "Classify the text: ",
"document": "Classify the text: ",
},
"MTOPDomainClassification": {
"query": "Classify the text: ",
"document": "Classify the text: ",
},
"MTOPIntentClassification": {
"query": "Classify the text: ",
"document": "Classify the text: ",
},
"ToxicConversationsClassification": {
"query": "Classify the text: ",
"document": "Classify the text: ",
},
"TweetSentimentExtractionClassification": {
"query": "Classify the text: ",
"document": "Classify the text: ",
},
"ArxivClusteringS2S": {
"query": "Cluster the text: ",
"document": "Cluster the text: ",
},
"BiorxivClusteringP2P": {
"query": "Cluster the text: ",
"document": "Cluster the text: ",
},
"BiorxivClusteringS2S": {
"query": "Cluster the text: ",
"document": "Cluster the text: ",
},
"ArxivClusteringP2P": {
"query": "Cluster the text: ",
"document": "Cluster the text: ",
},
"MedrxivClusteringP2P": {
"query": "Cluster the text: ",
"document": "Cluster the text: ",
},
"MedrxivClusteringS2S": {
"query": "Cluster the text: ",
"document": "Cluster the text: ",
},
"RedditClustering": {
"query": "Cluster the text: ",
"document": "Cluster the text: ",
},
"RedditClusteringP2P": {
"query": "Cluster the text: ",
"document": "Cluster the text: ",
},
"StackExchangeClustering": {
"query": "Cluster the text: ",
"document": "Cluster the text: ",
},
"StackExchangeClusteringP2P": {
"query": "Cluster the text: ",
"document": "Cluster the text: ",
},
"TwentyNewsgroupsClustering": {
"query": "Cluster the text: ",
"document": "Cluster the text: ",
},
"SprintDuplicateQuestions": {
"query": "Represent the sentence to retrieve similar sentence: ",
"document": "Represent the sentence to retrieve similar sentence: ",
},
"TwitterSemEval2015": {
"query": "Represent the sentence to retrieve similar sentence: ",
"document": "Represent the sentence to retrieve similar sentence: ",
},
"TwitterURLCorpus": {
"query": "Represent the sentence to retrieve similar sentence: ",
"document": "Represent the sentence to retrieve similar sentence: ",
},
'StackOverflowDupQuestions':
{
'query': 'Represent the sentence to retrieve similar sentences: ',
'document': 'Represent the sentence to retrieve similar sentences: ',
},
'SciDocsRR':
{
'query': 'Represent the paper title to retrieve relevant paper titles: ',
'document': 'Represent the paper title to retrieve relevant paper titles: ',
},
'MindSmallReranking':
{
'query': 'Represent the query to retrieve relevant documents: ',
'document': 'Represent the document for retrieval: ',
},
'AskUbuntuDupQuestions':
{
'query': 'Represent the question to retrieve similar questions: ',
'document': 'Represent the question to retrieve similar questions: ',
},
"SummEval": {
"query": "Represent the sentence to retrieve similar sentence: ",
"document": "Represent the sentence to retrieve similar sentence: ",
},
'ClimateFEVER':
{
'query': 'Represent the claim for retrieving supporting evidence: ',
'document': 'Represent the evidence for retrieval: ',
},
'HotpotQA':
{
'query': 'Represent the Wikipedia question for retrieving supporting documents: ',
'document': 'Represent the Wikipedia document for retrieval: ',
},
'FEVER':
{
'query': 'Represent the claim for retrieving supporting evidence: ',
'document': 'Represent the evidence for retrieval: ',
},
'MSMARCO':
{
'query': 'Represent the question for retrieving evidence documents: ',
'document': 'Represent the document for retrieval: ',
},
'DBPedia':
{
'query': 'Represent the entity-based query for retrieving relevant articles: ',
'document': 'Represent the article for retrieval: ',
},
'NQ':
{
'query': 'Represent the Wikipedia question for retrieving supporting documents: ',
'document': 'Represent the Wikipedia document for retrieval: ',
},
'QuoraRetrieval':
{
'query': 'Represent the quora question for retrieving similar questions: ',
'document': 'Represent the question for retrieval: ',
},
'SCIDOCS':
{
'query': 'Represent the paper title for retrieving possible citation documents: ',
'document': 'Represent the scitific paper document for retrieval: ',
},
'TRECCOVID':
{
'query': 'Represent the query for retrieving supporting articles: ',
'document': 'Represent the article for retrieval: ',
},
'Touche2020':
{
'query': 'Represent the question for retrieving supporting documents: ',
'document': 'Represent the document for retrieval: ',
},
"SciFact": {
"query": "Represent the scientific fact for retrieving supporting document: ",
"document": "Represent the document for retrieval: "
},
'NFCorpus':
{
'query': 'Represent the nutrition fact for retrieving evidence documents: ',
'document': 'Represent the pubmed document for retrieval: ',
},
'ArguAna':
{
'query': 'Represent the argument for retrieving counterarguments: ',
'document': 'Represent the conterargument for retrieval: ',
},
'FiQA2018':
{
'query': "Represent the query for retrieving supporting documents: ",
'document': "Represent the document for retrieval: ",
}
}
Evaluation results
- accuracy on MTEB AmazonCounterfactualClassification (en)test set self-reported95.776
- f1 on MTEB AmazonCounterfactualClassification (en)test set self-reported93.823
- f1_weighted on MTEB AmazonCounterfactualClassification (en)test set self-reported95.937
- ap on MTEB AmazonCounterfactualClassification (en)test set self-reported82.636
- ap_weighted on MTEB AmazonCounterfactualClassification (en)test set self-reported82.636
- main_score on MTEB AmazonCounterfactualClassification (en)test set self-reported95.776
- accuracy on MTEB AmazonPolarityClassification (default)test set self-reported97.714
- f1 on MTEB AmazonPolarityClassification (default)test set self-reported97.714
- f1_weighted on MTEB AmazonPolarityClassification (default)test set self-reported97.714
- ap on MTEB AmazonPolarityClassification (default)test set self-reported96.536