clfegg commited on Nov 21, 2024

Commit

1c682d7

verified ·

1 Parent(s): 4039879

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +3 -0
hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db.lock +0 -0
hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/59d594003bf59880a884c574bf88ef7555bb0202.lock +0 -0
hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/72b987fd805cfa2b58c4c8c952b274a11bfd5a00.lock +0 -0
hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/8cfec92309f5626a223304af2423e332f6d31887.lock +0 -0
hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/952a9b81c0bfd99800fabf352f69c7ccd46c5e43.lock +0 -0
hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/c79f2b6a0cea6f4b564fed1938984bace9d30ff0.lock +0 -0
hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/cb202bfe2e3c98645018a6d12f182a434c9d3e02.lock +0 -0
hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/d1514c3162bbe87b343f565fadc62e6c06f04f03.lock +0 -0
hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/e7b0375001f109a6b8873d756ad4f7bbb15fbaa5.lock +0 -0
hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fb140275c155a9c7c5a3b3e0e77a9e839594a938.lock +0 -0
hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fd1b291129c607e5d49799f87cb219b27f98acdf.lock +0 -0
hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/065b011b76fe98894d8975acfa28f028085fa35b.lock +0 -0
hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/0da3507018a1a1c625ff93179ff60bdb9202cc6c.lock +0 -0
hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/2c3387be76557bd40970cec13153b3bbf80407865484b209e655e5e4729076b8.lock +0 -0
hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/2ea7ad0e45a9d1d1591782ba7e29a703d0758831.lock +0 -0
hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/5fd10429389515d3e5cccdeda08cae5fea1ae82e.lock +0 -0
hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/b974b349cb2d419ada11181750a733ff82f291ad.lock +0 -0
hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/c06d5b49495f044e6380e68a60538be17a6bd5d1.lock +0 -0
hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/d1514c3162bbe87b343f565fadc62e6c06f04f03.lock +0 -0
hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/eaa086f0ffee582aeb45b36e34cdd1fe2d6de2bef61f8a559a1bbc9bd955917b.lock +0 -0
hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/f7640f94e81bb7f4f04daf1668850b38763a13d9.lock +0 -0
hub/.locks/models--vikhyatk--moondream2/0204ed10c186a4c7c68f55dff8f26087a45898d6.lock +0 -0
hub/.locks/models--vikhyatk--moondream2/226b0752cac7789c48f0cb3ec53eda48b7be36cc.lock +0 -0
hub/.locks/models--vikhyatk--moondream2/4b1f9051605c296344c271b6d21c1e2e412a99e8.lock +0 -0
hub/.locks/models--vikhyatk--moondream2/4bf7aed8ba4325d23fa7cd348d795a27f3b272682536f08aca4cdd62cde79293.lock +0 -0
hub/.locks/models--vikhyatk--moondream2/5145e0895f2fe7f1ccb3eb9da69ec74ec9c680db.lock +0 -0
hub/.locks/models--vikhyatk--moondream2/619b6765140cdfaa9b9d20619cae17643a28265f.lock +0 -0
hub/.locks/models--vikhyatk--moondream2/6ac7b4364eba1fdd1d3981e4669aed01a2b0cec4.lock +0 -0
hub/.locks/models--vikhyatk--moondream2/7debb4784a7d53328d4d021fc46314bec4af3833.lock +0 -0
hub/.locks/models--vikhyatk--moondream2/84ef7fb594b5c0979e48bdeddb60a0adef33df0b.lock +0 -0
hub/.locks/models--vikhyatk--moondream2/923ea295017e96fb15774a11a903f99adff3bd4b.lock +0 -0
hub/.locks/models--vikhyatk--moondream2/98dd65a59581dac66a3601da9aadd1534f019006.lock +0 -0
hub/.locks/models--vikhyatk--moondream2/a4878a2253d32f2dcd950cde16ebedffb9644ae6.lock +0 -0
hub/.locks/models--vikhyatk--moondream2/ae1ab764382e24c65d906c16fba36650b634426a.lock +0 -0
hub/.locks/models--vikhyatk--moondream2/b93162eb8252d2d937a69f17971c76b8be87aedd.lock +0 -0
hub/.locks/models--vikhyatk--moondream2/c1148447551675ea739c440ee3e247df9f354d8f.lock +0 -0
hub/models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/fa97f6e7cb1a59073dff9e6b13e2715cf7475ac9/adapter_config.json +0 -0
hub/models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/fa97f6e7cb1a59073dff9e6b13e2715cf7475ac9/added_tokens.json +0 -0
hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db +3 -0
hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/59d594003bf59880a884c574bf88ef7555bb0202 +4 -0
hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/72b987fd805cfa2b58c4c8c952b274a11bfd5a00 +24 -0
hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/8cfec92309f5626a223304af2423e332f6d31887 +177 -0
hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/952a9b81c0bfd99800fabf352f69c7ccd46c5e43 +20 -0
hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/c79f2b6a0cea6f4b564fed1938984bace9d30ff0 +1 -0
hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/cb202bfe2e3c98645018a6d12f182a434c9d3e02 +0 -0
hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/d1514c3162bbe87b343f565fadc62e6c06f04f03 +7 -0
hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/e7b0375001f109a6b8873d756ad4f7bbb15fbaa5 +1 -0
hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/fb140275c155a9c7c5a3b3e0e77a9e839594a938 +0 -0
hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/fd1b291129c607e5d49799f87cb219b27f98acdf +7 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db filter=lfs diff=lfs merge=lfs -text
+hub/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/blobs/eaa086f0ffee582aeb45b36e34cdd1fe2d6de2bef61f8a559a1bbc9bd955917b filter=lfs diff=lfs merge=lfs -text
+hub/models--vikhyatk--moondream2/blobs/4bf7aed8ba4325d23fa7cd348d795a27f3b272682536f08aca4cdd62cde79293 filter=lfs diff=lfs merge=lfs -text

hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/59d594003bf59880a884c574bf88ef7555bb0202.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/72b987fd805cfa2b58c4c8c952b274a11bfd5a00.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/8cfec92309f5626a223304af2423e332f6d31887.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/952a9b81c0bfd99800fabf352f69c7ccd46c5e43.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/c79f2b6a0cea6f4b564fed1938984bace9d30ff0.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/cb202bfe2e3c98645018a6d12f182a434c9d3e02.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/d1514c3162bbe87b343f565fadc62e6c06f04f03.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/e7b0375001f109a6b8873d756ad4f7bbb15fbaa5.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fb140275c155a9c7c5a3b3e0e77a9e839594a938.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fd1b291129c607e5d49799f87cb219b27f98acdf.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/065b011b76fe98894d8975acfa28f028085fa35b.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/0da3507018a1a1c625ff93179ff60bdb9202cc6c.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/2c3387be76557bd40970cec13153b3bbf80407865484b209e655e5e4729076b8.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/2ea7ad0e45a9d1d1591782ba7e29a703d0758831.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/5fd10429389515d3e5cccdeda08cae5fea1ae82e.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/b974b349cb2d419ada11181750a733ff82f291ad.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/c06d5b49495f044e6380e68a60538be17a6bd5d1.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/d1514c3162bbe87b343f565fadc62e6c06f04f03.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/eaa086f0ffee582aeb45b36e34cdd1fe2d6de2bef61f8a559a1bbc9bd955917b.lock ADDED Viewed

File without changes

hub/.locks/models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2/f7640f94e81bb7f4f04daf1668850b38763a13d9.lock ADDED Viewed

File without changes

hub/.locks/models--vikhyatk--moondream2/0204ed10c186a4c7c68f55dff8f26087a45898d6.lock ADDED Viewed

File without changes

hub/.locks/models--vikhyatk--moondream2/226b0752cac7789c48f0cb3ec53eda48b7be36cc.lock ADDED Viewed

File without changes

hub/.locks/models--vikhyatk--moondream2/4b1f9051605c296344c271b6d21c1e2e412a99e8.lock ADDED Viewed

File without changes

hub/.locks/models--vikhyatk--moondream2/4bf7aed8ba4325d23fa7cd348d795a27f3b272682536f08aca4cdd62cde79293.lock ADDED Viewed

File without changes

hub/.locks/models--vikhyatk--moondream2/5145e0895f2fe7f1ccb3eb9da69ec74ec9c680db.lock ADDED Viewed

File without changes

hub/.locks/models--vikhyatk--moondream2/619b6765140cdfaa9b9d20619cae17643a28265f.lock ADDED Viewed

File without changes

hub/.locks/models--vikhyatk--moondream2/6ac7b4364eba1fdd1d3981e4669aed01a2b0cec4.lock ADDED Viewed

File without changes

hub/.locks/models--vikhyatk--moondream2/7debb4784a7d53328d4d021fc46314bec4af3833.lock ADDED Viewed

File without changes

hub/.locks/models--vikhyatk--moondream2/84ef7fb594b5c0979e48bdeddb60a0adef33df0b.lock ADDED Viewed

File without changes

hub/.locks/models--vikhyatk--moondream2/923ea295017e96fb15774a11a903f99adff3bd4b.lock ADDED Viewed

File without changes

hub/.locks/models--vikhyatk--moondream2/98dd65a59581dac66a3601da9aadd1534f019006.lock ADDED Viewed

File without changes

hub/.locks/models--vikhyatk--moondream2/a4878a2253d32f2dcd950cde16ebedffb9644ae6.lock ADDED Viewed

File without changes

hub/.locks/models--vikhyatk--moondream2/ae1ab764382e24c65d906c16fba36650b634426a.lock ADDED Viewed

File without changes

hub/.locks/models--vikhyatk--moondream2/b93162eb8252d2d937a69f17971c76b8be87aedd.lock ADDED Viewed

File without changes

hub/.locks/models--vikhyatk--moondream2/c1148447551675ea739c440ee3e247df9f354d8f.lock ADDED Viewed

File without changes

hub/models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/fa97f6e7cb1a59073dff9e6b13e2715cf7475ac9/adapter_config.json ADDED Viewed

File without changes

hub/models--sentence-transformers--all-MiniLM-L6-v2/.no_exist/fa97f6e7cb1a59073dff9e6b13e2715cf7475ac9/added_tokens.json ADDED Viewed

File without changes

hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db
+size 90868376

hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/59d594003bf59880a884c574bf88ef7555bb0202 ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 256,
+  "do_lower_case": false
+}

hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/72b987fd805cfa2b58c4c8c952b274a11bfd5a00 ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "_name_or_path": "nreimers/MiniLM-L6-H384-uncased",
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 384,
+  "initializer_range": 0.02,
+  "intermediate_size": 1536,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.8.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/8cfec92309f5626a223304af2423e332f6d31887 ADDED Viewed

	@@ -0,0 +1,177 @@

+---
+language: en
+license: apache-2.0
+library_name: sentence-transformers
+tags:
+- sentence-transformers
+- feature-extraction
+- sentence-similarity
+- transformers
+datasets:
+- s2orc
+- flax-sentence-embeddings/stackexchange_xml
+- ms_marco
+- gooaq
+- yahoo_answers_topics
+- code_search_net
+- search_qa
+- eli5
+- snli
+- multi_nli
+- wikihow
+- natural_questions
+- trivia_qa
+- embedding-data/sentence-compression
+- embedding-data/flickr30k-captions
+- embedding-data/altlex
+- embedding-data/simple-wiki
+- embedding-data/QQP
+- embedding-data/SPECTER
+- embedding-data/PAQ_pairs
+- embedding-data/WikiAnswers
+pipeline_tag: sentence-similarity
+---
+# all-MiniLM-L6-v2
+This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search.
+## Usage (Sentence-Transformers)
+Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
+```
+pip install -U sentence-transformers
+```
+Then you can use the model like this:
+```python
+from sentence_transformers import SentenceTransformer
+sentences = ["This is an example sentence", "Each sentence is converted"]
+model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+embeddings = model.encode(sentences)
+print(embeddings)
+```
+## Usage (HuggingFace Transformers)
+Without [sentence-transformers](https://www.SBERT.net), you can use the model like this: First, you pass your input through the transformer model, then you have to apply the right pooling-operation on-top of the contextualized word embeddings.
+```python
+from transformers import AutoTokenizer, AutoModel
+import torch
+import torch.nn.functional as F
+#Mean Pooling - Take attention mask into account for correct averaging
+def mean_pooling(model_output, attention_mask):
+    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
+    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+# Sentences we want sentence embeddings for
+sentences = ['This is an example sentence', 'Each sentence is converted']
+# Load model from HuggingFace Hub
+tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
+model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
+# Tokenize sentences
+encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
+# Compute token embeddings
+with torch.no_grad():
+    model_output = model(**encoded_input)
+# Perform pooling
+sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
+# Normalize embeddings
+sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
+print("Sentence embeddings:")
+print(sentence_embeddings)
+```
+## Evaluation Results
+For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name=sentence-transformers/all-MiniLM-L6-v2)
+------
+## Background
+The project aims to train sentence embedding models on very large sentence level datasets using a self-supervised
+contrastive learning objective. We used the pretrained [`nreimers/MiniLM-L6-H384-uncased`](https://huggingface.co/nreimers/MiniLM-L6-H384-uncased) model and fine-tuned in on a
+1B sentence pairs dataset. We use a contrastive learning objective: given a sentence from the pair, the model should predict which out of a set of randomly sampled other sentences, was actually paired with it in our dataset.
+We developed this model during the
+[Community week using JAX/Flax for NLP & CV](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/7104),
+organized by Hugging Face. We developed this model as part of the project:
+[Train the Best Sentence Embedding Model Ever with 1B Training Pairs](https://discuss.huggingface.co/t/train-the-best-sentence-embedding-model-ever-with-1b-training-pairs/7354). We benefited from efficient hardware infrastructure to run the project: 7 TPUs v3-8, as well as intervention from Googles Flax, JAX, and Cloud team member about efficient deep learning frameworks.
+## Intended uses
+Our model is intended to be used as a sentence and short paragraph encoder. Given an input text, it outputs a vector which captures
+the semantic information. The sentence vector may be used for information retrieval, clustering or sentence similarity tasks.
+By default, input text longer than 256 word pieces is truncated.
+## Training procedure
+### Pre-training
+We use the pretrained [`nreimers/MiniLM-L6-H384-uncased`](https://huggingface.co/nreimers/MiniLM-L6-H384-uncased) model. Please refer to the model card for more detailed information about the pre-training procedure.
+### Fine-tuning
+We fine-tune the model using a contrastive objective. Formally, we compute the cosine similarity from each possible sentence pairs from the batch.
+We then apply the cross entropy loss by comparing with true pairs.
+#### Hyper parameters
+We trained our model on a TPU v3-8. We train the model during 100k steps using a batch size of 1024 (128 per TPU core).
+We use a learning rate warm up of 500. The sequence length was limited to 128 tokens. We used the AdamW optimizer with
+a 2e-5 learning rate. The full training script is accessible in this current repository: `train_script.py`.
+#### Training data
+We use the concatenation from multiple datasets to fine-tune our model. The total number of sentence pairs is above 1 billion sentences.
+We sampled each dataset given a weighted probability which configuration is detailed in the `data_config.json` file.
+| Dataset                                                  | Paper                                    | Number of training tuples  |
+|--------------------------------------------------------|:----------------------------------------:|:--------------------------:|
+| [Reddit comments (2015-2018)](https://github.com/PolyAI-LDN/conversational-datasets/tree/master/reddit) | [paper](https://arxiv.org/abs/1904.06472) | 726,484,430 |
+| [S2ORC](https://github.com/allenai/s2orc) Citation pairs (Abstracts) | [paper](https://aclanthology.org/2020.acl-main.447/) | 116,288,806 |
+| [WikiAnswers](https://github.com/afader/oqa#wikianswers-corpus) Duplicate question pairs | [paper](https://doi.org/10.1145/2623330.2623677) | 77,427,422 |
+| [PAQ](https://github.com/facebookresearch/PAQ) (Question, Answer) pairs | [paper](https://arxiv.org/abs/2102.07033) | 64,371,441 |
+| [S2ORC](https://github.com/allenai/s2orc) Citation pairs (Titles) | [paper](https://aclanthology.org/2020.acl-main.447/) | 52,603,982 |
+| [S2ORC](https://github.com/allenai/s2orc) (Title, Abstract) | [paper](https://aclanthology.org/2020.acl-main.447/) | 41,769,185 |
+| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title, Body) pairs  | - | 25,316,456 |
+| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title+Body, Answer) pairs  | - | 21,396,559 |
+| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title, Answer) pairs  | - | 21,396,559 |
+| [MS MARCO](https://microsoft.github.io/msmarco/) triplets | [paper](https://doi.org/10.1145/3404835.3462804) | 9,144,553 |
+| [GOOAQ: Open Question Answering with Diverse Answer Types](https://github.com/allenai/gooaq) | [paper](https://arxiv.org/pdf/2104.08727.pdf) | 3,012,496 |
+| [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Title, Answer) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 1,198,260 |
+| [Code Search](https://huggingface.co/datasets/code_search_net) | - | 1,151,414 |
+| [COCO](https://cocodataset.org/#home) Image captions | [paper](https://link.springer.com/chapter/10.1007%2F978-3-319-10602-1_48) | 828,395|
+| [SPECTER](https://github.com/allenai/specter) citation triplets | [paper](https://doi.org/10.18653/v1/2020.acl-main.207) | 684,100 |
+| [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Question, Answer) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 681,164 |
+| [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Title, Question) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 659,896 |
+| [SearchQA](https://huggingface.co/datasets/search_qa) | [paper](https://arxiv.org/abs/1704.05179) | 582,261 |
+| [Eli5](https://huggingface.co/datasets/eli5) | [paper](https://doi.org/10.18653/v1/p19-1346) | 325,475 |
+| [Flickr 30k](https://shannon.cs.illinois.edu/DenotationGraph/) | [paper](https://transacl.org/ojs/index.php/tacl/article/view/229/33) | 317,695 |
+| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (titles) | | 304,525 |
+| AllNLI ([SNLI](https://nlp.stanford.edu/projects/snli/) and [MultiNLI](https://cims.nyu.edu/~sbowman/multinli/) | [paper SNLI](https://doi.org/10.18653/v1/d15-1075), [paper MultiNLI](https://doi.org/10.18653/v1/n18-1101) | 277,230 |
+| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (bodies) | | 250,519 |
+| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (titles+bodies) | | 250,460 |
+| [Sentence Compression](https://github.com/google-research-datasets/sentence-compression) | [paper](https://www.aclweb.org/anthology/D13-1155/) | 180,000 |
+| [Wikihow](https://github.com/pvl/wikihow_pairs_dataset) | [paper](https://arxiv.org/abs/1810.09305) | 128,542 |
+| [Altlex](https://github.com/chridey/altlex/) | [paper](https://aclanthology.org/P16-1135.pdf) | 112,696 |
+| [Quora Question Triplets](https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs) | - | 103,663 |
+| [Simple Wikipedia](https://cs.pomona.edu/~dkauchak/simplification/) | [paper](https://www.aclweb.org/anthology/P11-2117/) | 102,225 |
+| [Natural Questions (NQ)](https://ai.google.com/research/NaturalQuestions) | [paper](https://transacl.org/ojs/index.php/tacl/article/view/1455) | 100,231 |
+| [SQuAD2.0](https://rajpurkar.github.io/SQuAD-explorer/) | [paper](https://aclanthology.org/P18-2124.pdf) | 87,599 |
+| [TriviaQA](https://huggingface.co/datasets/trivia_qa) | - | 73,346 |
+| **Total** | | **1,170,060,424** |

hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/952a9b81c0bfd99800fabf352f69c7ccd46c5e43 ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/c79f2b6a0cea6f4b564fed1938984bace9d30ff0 ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "name_or_path": "nreimers/MiniLM-L6-H384-uncased", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer", "model_max_length": 512}

hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/cb202bfe2e3c98645018a6d12f182a434c9d3e02 ADDED Viewed

The diff for this file is too large to render. See raw diff

hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/d1514c3162bbe87b343f565fadc62e6c06f04f03 ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "word_embedding_dimension": 384,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": true,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false
+}

hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/e7b0375001f109a6b8873d756ad4f7bbb15fbaa5 ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}

hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/fb140275c155a9c7c5a3b3e0e77a9e839594a938 ADDED Viewed

The diff for this file is too large to render. See raw diff

hub/models--sentence-transformers--all-MiniLM-L6-v2/blobs/fd1b291129c607e5d49799f87cb219b27f98acdf ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "__version__": {
+    "sentence_transformers": "2.0.0",
+    "transformers": "4.6.1",
+    "pytorch": "1.8.1"
+  }
+}