Spaces:
Running
Running
Muennighoff
commited on
Commit
·
216d974
1
Parent(s):
64dd40c
Add more OpenAI models
Browse files
app.py
CHANGED
@@ -158,15 +158,23 @@ EXTERNAL_MODELS = [
|
|
158 |
"sentence-t5-xxl",
|
159 |
"sup-simcse-bert-base-uncased",
|
160 |
"text-similarity-ada-001",
|
161 |
-
"text-
|
162 |
-
"text-search-ada-
|
|
|
|
|
|
|
163 |
"unsup-simcse-bert-base-uncased",
|
164 |
]
|
165 |
EXTERNAL_MODEL_TO_LINK = {
|
166 |
"LASER2": "https://github.com/facebookresearch/LASER",
|
167 |
"text-similarity-ada-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
168 |
-
"text-
|
169 |
"text-search-ada-doc-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
|
|
|
|
|
|
|
|
|
|
170 |
"LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
|
171 |
"sentence-t5-xxl": "https://huggingface.co/sentence-transformers/sentence-t5-xxl",
|
172 |
"sentence-t5-xl": "https://huggingface.co/sentence-transformers/sentence-t5-xl",
|
@@ -219,8 +227,15 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
219 |
"sentence-t5-xxl": 768,
|
220 |
"sup-simcse-bert-base-uncased": 768,
|
221 |
"text-similarity-ada-001": 1024,
|
|
|
|
|
|
|
222 |
"text-search-ada-query-001": 1024,
|
223 |
-
"text-search-ada-
|
|
|
|
|
|
|
|
|
224 |
"unsup-simcse-bert-base-uncased": 768,
|
225 |
}
|
226 |
|
@@ -255,7 +270,7 @@ def add_task(examples):
|
|
255 |
return examples
|
256 |
|
257 |
for model in EXTERNAL_MODELS:
|
258 |
-
ds = load_dataset("mteb/results", model)
|
259 |
# For local debugging:
|
260 |
#, download_mode='force_redownload', ignore_verifications=True)
|
261 |
ds = ds.map(add_lang)
|
@@ -297,7 +312,8 @@ def get_mteb_data(tasks=["Clustering"], langs=[], fillna=True, add_emb_dim=False
|
|
297 |
res = {k: v for d in results_list for k, v in d.items()}
|
298 |
# Model & at least one result
|
299 |
if len(res) > 1:
|
300 |
-
|
|
|
301 |
df_list.append(res)
|
302 |
|
303 |
for model in models:
|
|
|
158 |
"sentence-t5-xxl",
|
159 |
"sup-simcse-bert-base-uncased",
|
160 |
"text-similarity-ada-001",
|
161 |
+
"text-similarity-curie-001",
|
162 |
+
"text-search-ada-001",
|
163 |
+
"text-search-babbage-001",
|
164 |
+
"text-search-curie-001",
|
165 |
+
"text-search-davinci-001",
|
166 |
"unsup-simcse-bert-base-uncased",
|
167 |
]
|
168 |
EXTERNAL_MODEL_TO_LINK = {
|
169 |
"LASER2": "https://github.com/facebookresearch/LASER",
|
170 |
"text-similarity-ada-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
171 |
+
"text-similarity-curie-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
172 |
"text-search-ada-doc-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
173 |
+
"text-search-ada-query-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
174 |
+
"text-search-ada-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
175 |
+
"text-search-curie-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
176 |
+
"text-search-babbage-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
177 |
+
"text-search-davinci-001": "https://beta.openai.com/docs/guides/embeddings/types-of-embedding-models",
|
178 |
"LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
|
179 |
"sentence-t5-xxl": "https://huggingface.co/sentence-transformers/sentence-t5-xxl",
|
180 |
"sentence-t5-xl": "https://huggingface.co/sentence-transformers/sentence-t5-xl",
|
|
|
227 |
"sentence-t5-xxl": 768,
|
228 |
"sup-simcse-bert-base-uncased": 768,
|
229 |
"text-similarity-ada-001": 1024,
|
230 |
+
"text-similarity-curie-001": 4096,
|
231 |
+
|
232 |
+
"text-search-ada-doc-001": 1024,
|
233 |
"text-search-ada-query-001": 1024,
|
234 |
+
"text-search-ada-001": 1024,
|
235 |
+
"text-search-babbage-001": 2048,
|
236 |
+
"text-search-curie-001": 4096,
|
237 |
+
"text-search-davinci-001": 12288,
|
238 |
+
|
239 |
"unsup-simcse-bert-base-uncased": 768,
|
240 |
}
|
241 |
|
|
|
270 |
return examples
|
271 |
|
272 |
for model in EXTERNAL_MODELS:
|
273 |
+
ds = load_dataset("mteb/results", model, download_mode='force_redownload', ignore_verifications=True)
|
274 |
# For local debugging:
|
275 |
#, download_mode='force_redownload', ignore_verifications=True)
|
276 |
ds = ds.map(add_lang)
|
|
|
312 |
res = {k: v for d in results_list for k, v in d.items()}
|
313 |
# Model & at least one result
|
314 |
if len(res) > 1:
|
315 |
+
if add_emb_dim:
|
316 |
+
res["Embedding Dimensions"] = EXTERNAL_MODEL_TO_DIM.get(model, "")
|
317 |
df_list.append(res)
|
318 |
|
319 |
for model in models:
|