Training in progress, epoch 1, checkpoint
Browse files- last-checkpoint/1_Pooling/config.json +10 -0
- last-checkpoint/README.md +634 -0
- last-checkpoint/added_tokens.json +3 -0
- last-checkpoint/config.json +35 -0
- last-checkpoint/config_sentence_transformers.json +10 -0
- last-checkpoint/modules.json +14 -0
- last-checkpoint/optimizer.pt +3 -0
- last-checkpoint/pytorch_model.bin +3 -0
- last-checkpoint/rng_state.pth +3 -0
- last-checkpoint/scheduler.pt +3 -0
- last-checkpoint/sentence_bert_config.json +4 -0
- last-checkpoint/special_tokens_map.json +15 -0
- last-checkpoint/spm.model +3 -0
- last-checkpoint/tokenizer.json +0 -0
- last-checkpoint/tokenizer_config.json +58 -0
- last-checkpoint/trainer_state.json +125 -0
- last-checkpoint/training_args.bin +3 -0
last-checkpoint/1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
last-checkpoint/README.md
ADDED
@@ -0,0 +1,634 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
+
library_name: sentence-transformers
|
5 |
+
tags:
|
6 |
+
- sentence-transformers
|
7 |
+
- sentence-similarity
|
8 |
+
- feature-extraction
|
9 |
+
- generated_from_trainer
|
10 |
+
- dataset_size:96781
|
11 |
+
- loss:MultipleNegativesRankingLoss
|
12 |
+
- loss:AnglELoss
|
13 |
+
- loss:GISTEmbedLoss
|
14 |
+
- loss:OnlineContrastiveLoss
|
15 |
+
- loss:MultipleNegativesSymmetricRankingLoss
|
16 |
+
base_model: microsoft/deberta-v3-small
|
17 |
+
datasets:
|
18 |
+
- sentence-transformers/all-nli
|
19 |
+
- sentence-transformers/stsb
|
20 |
+
- tals/vitaminc
|
21 |
+
- nyu-mll/glue
|
22 |
+
- allenai/scitail
|
23 |
+
- sentence-transformers/xsum
|
24 |
+
- sentence-transformers/sentence-compression
|
25 |
+
widget:
|
26 |
+
- source_sentence: What dual titles did Frederick William hold?
|
27 |
+
sentences:
|
28 |
+
- The impact was increased by chronic overfishing, and by eutrophication that gave
|
29 |
+
the entire ecosystem a short-term boost, causing the Mnemiopsis population to
|
30 |
+
increase even faster than normal – and above all by the absence of efficient predators
|
31 |
+
on these introduced ctenophores.
|
32 |
+
- The "European Council" (rather than the Council, made up of different government
|
33 |
+
Ministers) is composed of the Prime Ministers or executive Presidents of the member
|
34 |
+
states.
|
35 |
+
- Nearly 50,000 Huguenots established themselves in Germany, 20,000 of whom were
|
36 |
+
welcomed in Brandenburg-Prussia, where they were granted special privileges (Edict
|
37 |
+
of Potsdam) and churches in which to worship (such as the Church of St. Peter
|
38 |
+
and St. Paul, Angermünde) by Frederick William, Elector of Brandenburg and Duke
|
39 |
+
of Prussia.
|
40 |
+
- source_sentence: the Great Internet Mersenne Prime Search, what was the prize for
|
41 |
+
finding a prime with at least 10 million digits?
|
42 |
+
sentences:
|
43 |
+
- Since September 2004, the official home of the Scottish Parliament has been a
|
44 |
+
new Scottish Parliament Building, in the Holyrood area of Edinburgh.
|
45 |
+
- The roughly half-mile stretch of Kearney Boulevard between Fresno Street and Thorne
|
46 |
+
Ave was at one time the preferred neighborhood for Fresno's elite African-American
|
47 |
+
families.
|
48 |
+
- In 2009, the Great Internet Mersenne Prime Search project was awarded a US$100,000
|
49 |
+
prize for first discovering a prime with at least 10 million digits.
|
50 |
+
- source_sentence: A woman is tugging on a white sheet and laughing
|
51 |
+
sentences:
|
52 |
+
- Fruit characters decorate this child's bib
|
53 |
+
- The person is amused.
|
54 |
+
- Farmers preparing to feed their animals.
|
55 |
+
- source_sentence: A hispanic fruit market with many different fruits and vegetables
|
56 |
+
in view on a city street with a man passing the store dressed in dark pants and
|
57 |
+
a hoodie.
|
58 |
+
sentences:
|
59 |
+
- A fruit market and a man
|
60 |
+
- The guys have guns.
|
61 |
+
- Two people are playing an instruments on stage.
|
62 |
+
- source_sentence: Two Asian children, a boy and a girl, the girl looking squarely
|
63 |
+
at the camera and the boy making a face.
|
64 |
+
sentences:
|
65 |
+
- A woman is outside.
|
66 |
+
- there are children near the camera
|
67 |
+
- A boy is playing on an inflatable ride.
|
68 |
+
pipeline_tag: sentence-similarity
|
69 |
+
---
|
70 |
+
|
71 |
+
# SentenceTransformer based on microsoft/deberta-v3-small
|
72 |
+
|
73 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [microsoft/deberta-v3-small](https://huggingface.co/microsoft/deberta-v3-small) on the [nli-pairs](https://huggingface.co/datasets/sentence-transformers/all-nli), [sts-label](https://huggingface.co/datasets/sentence-transformers/stsb), [vitaminc-pairs](https://huggingface.co/datasets/tals/vitaminc), [qnli-contrastive](https://huggingface.co/datasets/nyu-mll/glue), [scitail-pairs-qa](https://huggingface.co/datasets/allenai/scitail), [scitail-pairs-pos](https://huggingface.co/datasets/allenai/scitail), [xsum-pairs](https://huggingface.co/datasets/sentence-transformers/xsum) and [compression-pairs](https://huggingface.co/datasets/sentence-transformers/sentence-compression) datasets. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
74 |
+
|
75 |
+
## Model Details
|
76 |
+
|
77 |
+
### Model Description
|
78 |
+
- **Model Type:** Sentence Transformer
|
79 |
+
- **Base model:** [microsoft/deberta-v3-small](https://huggingface.co/microsoft/deberta-v3-small) <!-- at revision a36c739020e01763fe789b4b85e2df55d6180012 -->
|
80 |
+
- **Maximum Sequence Length:** 512 tokens
|
81 |
+
- **Output Dimensionality:** 768 tokens
|
82 |
+
- **Similarity Function:** Cosine Similarity
|
83 |
+
- **Training Datasets:**
|
84 |
+
- [nli-pairs](https://huggingface.co/datasets/sentence-transformers/all-nli)
|
85 |
+
- [sts-label](https://huggingface.co/datasets/sentence-transformers/stsb)
|
86 |
+
- [vitaminc-pairs](https://huggingface.co/datasets/tals/vitaminc)
|
87 |
+
- [qnli-contrastive](https://huggingface.co/datasets/nyu-mll/glue)
|
88 |
+
- [scitail-pairs-qa](https://huggingface.co/datasets/allenai/scitail)
|
89 |
+
- [scitail-pairs-pos](https://huggingface.co/datasets/allenai/scitail)
|
90 |
+
- [xsum-pairs](https://huggingface.co/datasets/sentence-transformers/xsum)
|
91 |
+
- [compression-pairs](https://huggingface.co/datasets/sentence-transformers/sentence-compression)
|
92 |
+
- **Language:** en
|
93 |
+
<!-- - **License:** Unknown -->
|
94 |
+
|
95 |
+
### Model Sources
|
96 |
+
|
97 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
98 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
99 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
100 |
+
|
101 |
+
### Full Model Architecture
|
102 |
+
|
103 |
+
```
|
104 |
+
SentenceTransformer(
|
105 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: DebertaV2Model
|
106 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
107 |
+
)
|
108 |
+
```
|
109 |
+
|
110 |
+
## Usage
|
111 |
+
|
112 |
+
### Direct Usage (Sentence Transformers)
|
113 |
+
|
114 |
+
First install the Sentence Transformers library:
|
115 |
+
|
116 |
+
```bash
|
117 |
+
pip install -U sentence-transformers
|
118 |
+
```
|
119 |
+
|
120 |
+
Then you can load this model and run inference.
|
121 |
+
```python
|
122 |
+
from sentence_transformers import SentenceTransformer
|
123 |
+
|
124 |
+
# Download from the 🤗 Hub
|
125 |
+
model = SentenceTransformer("bobox/DeBERTaV3-small-GeneralSentenceTransformer-checkpoints-tmp")
|
126 |
+
# Run inference
|
127 |
+
sentences = [
|
128 |
+
'Two Asian children, a boy and a girl, the girl looking squarely at the camera and the boy making a face.',
|
129 |
+
'there are children near the camera',
|
130 |
+
'A boy is playing on an inflatable ride.',
|
131 |
+
]
|
132 |
+
embeddings = model.encode(sentences)
|
133 |
+
print(embeddings.shape)
|
134 |
+
# [3, 768]
|
135 |
+
|
136 |
+
# Get the similarity scores for the embeddings
|
137 |
+
similarities = model.similarity(embeddings, embeddings)
|
138 |
+
print(similarities.shape)
|
139 |
+
# [3, 3]
|
140 |
+
```
|
141 |
+
|
142 |
+
<!--
|
143 |
+
### Direct Usage (Transformers)
|
144 |
+
|
145 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
146 |
+
|
147 |
+
</details>
|
148 |
+
-->
|
149 |
+
|
150 |
+
<!--
|
151 |
+
### Downstream Usage (Sentence Transformers)
|
152 |
+
|
153 |
+
You can finetune this model on your own dataset.
|
154 |
+
|
155 |
+
<details><summary>Click to expand</summary>
|
156 |
+
|
157 |
+
</details>
|
158 |
+
-->
|
159 |
+
|
160 |
+
<!--
|
161 |
+
### Out-of-Scope Use
|
162 |
+
|
163 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
164 |
+
-->
|
165 |
+
|
166 |
+
<!--
|
167 |
+
## Bias, Risks and Limitations
|
168 |
+
|
169 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
170 |
+
-->
|
171 |
+
|
172 |
+
<!--
|
173 |
+
### Recommendations
|
174 |
+
|
175 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
176 |
+
-->
|
177 |
+
|
178 |
+
## Training Details
|
179 |
+
|
180 |
+
### Training Datasets
|
181 |
+
|
182 |
+
#### nli-pairs
|
183 |
+
|
184 |
+
* Dataset: [nli-pairs](https://huggingface.co/datasets/sentence-transformers/all-nli) at [d482672](https://huggingface.co/datasets/sentence-transformers/all-nli/tree/d482672c8e74ce18da116f430137434ba2e52fab)
|
185 |
+
* Size: 7,500 training samples
|
186 |
+
* Columns: <code>sentence1</code> and <code>sentence2</code>
|
187 |
+
* Approximate statistics based on the first 1000 samples:
|
188 |
+
| | sentence1 | sentence2 |
|
189 |
+
|:--------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
|
190 |
+
| type | string | string |
|
191 |
+
| details | <ul><li>min: 5 tokens</li><li>mean: 16.62 tokens</li><li>max: 62 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 9.46 tokens</li><li>max: 29 tokens</li></ul> |
|
192 |
+
* Samples:
|
193 |
+
| sentence1 | sentence2 |
|
194 |
+
|:---------------------------------------------------------------------------|:-------------------------------------------------|
|
195 |
+
| <code>A person on a horse jumps over a broken down airplane.</code> | <code>A person is outdoors, on a horse.</code> |
|
196 |
+
| <code>Children smiling and waving at camera</code> | <code>There are children present</code> |
|
197 |
+
| <code>A boy is jumping on skateboard in the middle of a red bridge.</code> | <code>The boy does a skateboarding trick.</code> |
|
198 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
199 |
+
```json
|
200 |
+
{
|
201 |
+
"scale": 20.0,
|
202 |
+
"similarity_fct": "cos_sim"
|
203 |
+
}
|
204 |
+
```
|
205 |
+
|
206 |
+
#### sts-label
|
207 |
+
|
208 |
+
* Dataset: [sts-label](https://huggingface.co/datasets/sentence-transformers/stsb) at [ab7a5ac](https://huggingface.co/datasets/sentence-transformers/stsb/tree/ab7a5ac0e35aa22088bdcf23e7fd99b220e53308)
|
209 |
+
* Size: 5,749 training samples
|
210 |
+
* Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>score</code>
|
211 |
+
* Approximate statistics based on the first 1000 samples:
|
212 |
+
| | sentence1 | sentence2 | score |
|
213 |
+
|:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------|
|
214 |
+
| type | string | string | float |
|
215 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 9.81 tokens</li><li>max: 27 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 9.74 tokens</li><li>max: 25 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.54</li><li>max: 1.0</li></ul> |
|
216 |
+
* Samples:
|
217 |
+
| sentence1 | sentence2 | score |
|
218 |
+
|:-----------------------------------------------------------|:----------------------------------------------------------------------|:------------------|
|
219 |
+
| <code>A plane is taking off.</code> | <code>An air plane is taking off.</code> | <code>1.0</code> |
|
220 |
+
| <code>A man is playing a large flute.</code> | <code>A man is playing a flute.</code> | <code>0.76</code> |
|
221 |
+
| <code>A man is spreading shreded cheese on a pizza.</code> | <code>A man is spreading shredded cheese on an uncooked pizza.</code> | <code>0.76</code> |
|
222 |
+
* Loss: [<code>AnglELoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#angleloss) with these parameters:
|
223 |
+
```json
|
224 |
+
{
|
225 |
+
"scale": 20.0,
|
226 |
+
"similarity_fct": "pairwise_angle_sim"
|
227 |
+
}
|
228 |
+
```
|
229 |
+
|
230 |
+
#### vitaminc-pairs
|
231 |
+
|
232 |
+
* Dataset: [vitaminc-pairs](https://huggingface.co/datasets/tals/vitaminc) at [be6febb](https://huggingface.co/datasets/tals/vitaminc/tree/be6febb761b0b2807687e61e0b5282e459df2fa0)
|
233 |
+
* Size: 3,695 training samples
|
234 |
+
* Columns: <code>label</code>, <code>sentence1</code>, and <code>sentence2</code>
|
235 |
+
* Approximate statistics based on the first 1000 samples:
|
236 |
+
| | label | sentence1 | sentence2 |
|
237 |
+
|:--------|:-----------------------------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
|
238 |
+
| type | int | string | string |
|
239 |
+
| details | <ul><li>1: 100.00%</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 16.21 tokens</li><li>max: 67 tokens</li></ul> | <ul><li>min: 8 tokens</li><li>mean: 37.22 tokens</li><li>max: 224 tokens</li></ul> |
|
240 |
+
* Samples:
|
241 |
+
| label | sentence1 | sentence2 |
|
242 |
+
|:---------------|:------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
243 |
+
| <code>1</code> | <code>Ginn filled out the lineup in 2015 .</code> | <code>In 2015 , Ginn filled out the line up with adding new members Tyler Smith on bass , and Brandon Pertzborn on drums.</code> |
|
244 |
+
| <code>1</code> | <code>Brent Grimes is a free agent .</code> | <code>Brent Omar Grimes ( born July 19 , 1983 ) is an American football cornerback who is currently a free agent .</code> |
|
245 |
+
| <code>1</code> | <code>The Symphony by Erich Korngold is in F-sharp Major .</code> | <code>`` Classical music critic Mark Swed of the Los Angeles Times pointed out that many classical composers used material from previous composers saying that `` '' John Williams all but lifted the core idea of his Star Wars soundtrack score from the Scherzo of Erich Korngold 's Symphony in F-sharp Major , written 25 years earlier . '' '' ''</code> |
|
246 |
+
* Loss: [<code>GISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#gistembedloss) with these parameters:
|
247 |
+
```json
|
248 |
+
{'guide': SentenceTransformer(
|
249 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel
|
250 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
251 |
+
(2): Normalize()
|
252 |
+
), 'temperature': 0.05}
|
253 |
+
```
|
254 |
+
|
255 |
+
#### qnli-contrastive
|
256 |
+
|
257 |
+
* Dataset: [qnli-contrastive](https://huggingface.co/datasets/nyu-mll/glue) at [bcdcba7](https://huggingface.co/datasets/nyu-mll/glue/tree/bcdcba79d07bc864c1c254ccfcedcce55bcc9a8c)
|
258 |
+
* Size: 7,500 training samples
|
259 |
+
* Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
|
260 |
+
* Approximate statistics based on the first 1000 samples:
|
261 |
+
| | sentence1 | sentence2 | label |
|
262 |
+
|:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------|
|
263 |
+
| type | string | string | int |
|
264 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 13.84 tokens</li><li>max: 30 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 34.77 tokens</li><li>max: 166 tokens</li></ul> | <ul><li>0: 100.00%</li></ul> |
|
265 |
+
* Samples:
|
266 |
+
| sentence1 | sentence2 | label |
|
267 |
+
|:-------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
|
268 |
+
| <code>The term, the New Haven Approach was what exactly?</code> | <code>A theory of international law, which argues for a sociological normative approach in regards to jurisprudence, is named the New Haven Approach, after the city.</code> | <code>0</code> |
|
269 |
+
| <code>As of April 2014, how many albums have Jay Z and Beyonce sold together?</code> | <code>As of April 2014, the couple have sold a combined 300 million records together.</code> | <code>0</code> |
|
270 |
+
| <code>Most U.S law, the kind of law we live everyday, consists of what kind of law?</code> | <code>In the dual-sovereign system of American federalism (actually tripartite because of the presence of Indian reservations), states are the plenary sovereigns, each with their own constitution, while the federal sovereign possesses only the limited supreme authority enumerated in the Constitution.</code> | <code>0</code> |
|
271 |
+
* Loss: [<code>OnlineContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#onlinecontrastiveloss)
|
272 |
+
|
273 |
+
#### scitail-pairs-qa
|
274 |
+
|
275 |
+
* Dataset: [scitail-pairs-qa](https://huggingface.co/datasets/allenai/scitail) at [0cc4353](https://huggingface.co/datasets/allenai/scitail/tree/0cc4353235b289165dfde1c7c5d1be983f99ce44)
|
276 |
+
* Size: 14,987 training samples
|
277 |
+
* Columns: <code>sentence2</code> and <code>sentence1</code>
|
278 |
+
* Approximate statistics based on the first 1000 samples:
|
279 |
+
| | sentence2 | sentence1 |
|
280 |
+
|:--------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
|
281 |
+
| type | string | string |
|
282 |
+
| details | <ul><li>min: 7 tokens</li><li>mean: 15.73 tokens</li><li>max: 41 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 14.9 tokens</li><li>max: 33 tokens</li></ul> |
|
283 |
+
* Samples:
|
284 |
+
| sentence2 | sentence1 |
|
285 |
+
|:-------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------|
|
286 |
+
| <code>The nervous system plays a critical role in the regulation of vascular homeostasis.</code> | <code>What system plays a critical role in the regulation of vascular homeostasis?</code> |
|
287 |
+
| <code>A moose eats a plant is an example of a living thing that depends on another living thing to survive.</code> | <code>Which statement best identifies a living thing that depends on another living thing to survive?</code> |
|
288 |
+
| <code>Single-celled organisms and multicellular organisms have this in common: both have a way to get rid of waste materials.</code> | <code>Which characteristic do single-celled organisms and multicellular organisms have in common?</code> |
|
289 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
290 |
+
```json
|
291 |
+
{
|
292 |
+
"scale": 20.0,
|
293 |
+
"similarity_fct": "cos_sim"
|
294 |
+
}
|
295 |
+
```
|
296 |
+
|
297 |
+
#### scitail-pairs-pos
|
298 |
+
|
299 |
+
* Dataset: [scitail-pairs-pos](https://huggingface.co/datasets/allenai/scitail) at [0cc4353](https://huggingface.co/datasets/allenai/scitail/tree/0cc4353235b289165dfde1c7c5d1be983f99ce44)
|
300 |
+
* Size: 8,600 training samples
|
301 |
+
* Columns: <code>sentence1</code> and <code>sentence2</code>
|
302 |
+
* Approximate statistics based on the first 1000 samples:
|
303 |
+
| | sentence1 | sentence2 |
|
304 |
+
|:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
305 |
+
| type | string | string |
|
306 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 23.64 tokens</li><li>max: 69 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 15.52 tokens</li><li>max: 36 tokens</li></ul> |
|
307 |
+
* Samples:
|
308 |
+
| sentence1 | sentence2 |
|
309 |
+
|:--------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------|
|
310 |
+
| <code>population A group of organisms of the same species that occupy a particular geographic area or region.</code> | <code>You call a group of organisms of the same species that live in the same area a(n) population.</code> |
|
311 |
+
| <code>A galaxy is a vast island of hundreds of billions of stars (solar systems), nebulas and star clusters.</code> | <code>A galaxy is best described as a cluster of billions of stars.</code> |
|
312 |
+
| <code>Photosynthesis is an anabolic process by which plants synthesize glucose from the raw products carbon dioxide and water.</code> | <code>Photosynthesis converts carbon dioxide and water into glucose.</code> |
|
313 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
314 |
+
```json
|
315 |
+
{
|
316 |
+
"scale": 20.0,
|
317 |
+
"similarity_fct": "cos_sim"
|
318 |
+
}
|
319 |
+
```
|
320 |
+
|
321 |
+
#### xsum-pairs
|
322 |
+
|
323 |
+
* Dataset: [xsum-pairs](https://huggingface.co/datasets/sentence-transformers/xsum) at [788ddaf](https://huggingface.co/datasets/sentence-transformers/xsum/tree/788ddafe04e539956d56b567bc32a036ee7b9206)
|
324 |
+
* Size: 3,750 training samples
|
325 |
+
* Columns: <code>sentence1</code> and <code>sentence2</code>
|
326 |
+
* Approximate statistics based on the first 1000 samples:
|
327 |
+
| | sentence1 | sentence2 |
|
328 |
+
|:--------|:-------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
329 |
+
| type | string | string |
|
330 |
+
| details | <ul><li>min: 35 tokens</li><li>mean: 353.88 tokens</li><li>max: 512 tokens</li></ul> | <ul><li>min: 8 tokens</li><li>mean: 26.93 tokens</li><li>max: 67 tokens</li></ul> |
|
331 |
+
* Samples:
|
332 |
+
| sentence1 | sentence2 |
|
333 |
+
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
334 |
+
| <code>About 24 of the rocks, known as Hertfordshire Puddingstone, were removed from the former St Albans Museum grounds.<br>It is thought they were taken from the garden between the 3 and 14 April.<br>Hertfordshire Police said the theft of the rare rock was being treated as a heritage crime.<br>PC Sean Lannon said Hertfordshire Puddingstone was "one of the world's rarest rocks" and part of the county's heritage.<br>"We are doing all we can to ensure that these stones are returned to the museum," he said.<br>The force has appealed for witnesses or anyone who may have been offered the rocks for sale to come forward.<br>Hertfordshire Puddingstone is a naturally occurring conglomerate consisting of rounded flint pebbles bound in silica cement, found mostly within the county.<br>It is thought to have originated from deposits laid down millions of years ago and is called puddingstone because the flints resemble the plums in a Christmas pudding.<br>Most of the rocks taken came from the site of the Seventh Day Adventist Church during the late 1970s.<br>St Albans Museum in Hatfield Road closed earlier this year ahead of its move to a new site in the Town Hall which is due to open next year.</code> | <code>A collection of prehistoric stones thought be about 54 million years old has been stolen from a former museum site, police have said.</code> |
|
335 |
+
| <code>Instead, in today's ever more fitness and fashion conscious world, a growing number are willing to pay as much for a new gym outfit as they do for a new formal party dress.<br>This has led to a big increase over recent years in the value of the women's sportswear market.<br>In the US alone, combined sales of such products - from yoga leggings, sports bras and vests, to tracksuits - totalled $15.1bn (£10.3bn) in the 12 months to August 2011, according to research firm NPD Group. It said this was 10% higher than the prior year.<br>Meanwhile, sportswear giant Nike said last October that the rate of sales growth in its female clothing ranges was outpacing that of its products for men.<br>Analysts say that the rise in sales of women's sportswear has been helped by an increased emphasis on the style of the clothing - making them look and feel as good as possible - which in turn has led to an increase in the number of women wearing such items as fashionable leisurewear.<br>And with the market being so valuable, it is not surprising that a growing number of small companies - predominantly led by women - are launching their own ranges of upmarket female sportswear.<br>Katy Biddulph didn't need gym membership when she launched her women's sportswear brand Striders Edge in London back in 2011.<br>Initially running the business from a second floor one-bedroom flat, she would get her exercise by carrying all her deliveries up and down the stairs.<br>The 31-year-old says: "It looked like a fairly big business to the outside world when I was just starting out, but I was receiving all my goods from the manufacturer in Portugal from a truck outside my flat.<br>"I had hundreds of garments landing in the street, and I had to get all the boxes up the stairs by myself. I never slept that first year, but I just knew there was a gap in the market that I could fill.<br>"Now I've got an office that overlooks the London Eye."<br>Ms Biddulph set up the business after previously working for fellow British women's sportswear company Sweaty Betty, where she designed and managed a number of product ranges.<br>Her industry experience and knowledge persuaded a number of private investors to back her venture.<br>Striders Edge's clothes are now stocked by UK retailers Harrods, John Lewis and House of Fraser, and the brand launched in the US in February. It also sells globally via its website.<br>Now with nine members of staff, Ms Biddulph says she wants to hit £2m in sales within the next 12 months.<br>She adds: "You want your customer to feel great and part of something. As a female, you know the standard concerns."<br>But just how do you convince women to spend more than £60 on a t-shirt or a pair of leggings?<br>"It's not as hard as you would think," says Brittany Morris-Asquith, spokesperson and designer for Titika Active Couture, a Canadian brand based in Toronto. "Women are always looking for something different.<br>"They're asking more questions about fabrics, and if they understand the construction that goes into it, they're willing to pay for a better product."<br>Since Titika's founder Eileen Zhang, 32, opened her first shop in Toronto in 2009, Titika has expanded to seven stores across the province of Ontario.<br>And in March of this year it expended its online sales to the US, with plans to ship globally later this year.<br>Ms Morris-Asquith adds: "We provide clothing to women that make them feel good, we encourage them to try on things that they would never think about."<br>Titika also offers free in-store exercise classes to promote a healthy lifestyle - from yoga and kickboxing, to zumba dance workouts. And inspirational slogans affixed above fitting room mirrors urge against body shaming.<br>Catherine Elliott, a professor at the Telfer School of Management at the University of Ottawa, says that businesses such as Striders Edge and Titika share an ethos which is typical for female-led companies.<br>"They tend to have a double bottom line - to create wealth, but also to make positive change for girls and women," says Prof Elliott, who is co-author of a recently published book on the subject called Feminine Capital.<br>"When women are defining the objectives of [a clothing] business, they're going to see it as something that empowers women as opposed to just making them look sexy.<br>"The sports clothing industry is about feeling good about yourself, and wearing clothing that fits and makes you feel comfortable."<br>She adds: "A lot of women have talked about how being in sports and fitness has given them the leadership skills and confidence to be successful in corporate settings and entrepreneurship."<br>At New York-based women's sportswear business Live The Process, founder Robyn Berkley says the aim is for the brand to not just be about clothing, and instead "offer authenticity, honesty, and embrace the idealism of wellness".<br>Its website features editorial content from 32 contributors, offering tips ranging from changing careers to taking care of your skin.<br>Established in 2013, the company's clothing range was an immediate hit, with sales topping £1m in its first year.</code> | <code>The days when women would simply throw on an old T-shirt to do some exercise are long gone.</code> |
|
336 |
+
| <code>Bovis Homes wanted to build the new estate on land between Collingtree Park and Collingtree village to the south of Northampton.<br>The plans were rejected by Northampton Borough Council in January.<br>Now a report to the council's planning committee, meeting next week, says Bovis Home intends to appeal.<br>The plans, for the site to the north of the M1, also included community buildings, a site for a primary school and open space.<br>Council officers said in the report that if the appeal was successful it would be important to have in place an agreement with the developer for it to help fund play areas, footpath improvements and a school and community buildings.<br>The original plans were recommended for approval by council officers but rejected by the council after fears were raised about flooding in the area and some homes being sited too near the M1.</code> | <code>Developers who saw their plans for 1,000 homes near a Northamptonshire village rejected by councillors are set to appeal against the decision.</code> |
|
337 |
+
* Loss: [<code>MultipleNegativesSymmetricRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativessymmetricrankingloss) with these parameters:
|
338 |
+
```json
|
339 |
+
{
|
340 |
+
"scale": 20.0,
|
341 |
+
"similarity_fct": "cos_sim"
|
342 |
+
}
|
343 |
+
```
|
344 |
+
|
345 |
+
#### compression-pairs
|
346 |
+
|
347 |
+
* Dataset: [compression-pairs](https://huggingface.co/datasets/sentence-transformers/sentence-compression) at [605bc91](https://huggingface.co/datasets/sentence-transformers/sentence-compression/tree/605bc91d95631895ba25b6eda51a3cb596976c90)
|
348 |
+
* Size: 45,000 training samples
|
349 |
+
* Columns: <code>sentence1</code> and <code>sentence2</code>
|
350 |
+
* Approximate statistics based on the first 1000 samples:
|
351 |
+
| | sentence1 | sentence2 |
|
352 |
+
|:--------|:------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
353 |
+
| type | string | string |
|
354 |
+
| details | <ul><li>min: 10 tokens</li><li>mean: 31.78 tokens</li><li>max: 170 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 10.14 tokens</li><li>max: 29 tokens</li></ul> |
|
355 |
+
* Samples:
|
356 |
+
| sentence1 | sentence2 |
|
357 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------|
|
358 |
+
| <code>The USHL completed an expansion draft on Monday as 10 players who were on the rosters of USHL teams during the 2009-10 season were selected by the League's two newest entries, the Muskegon Lumberjacks and Dubuque Fighting Saints.</code> | <code>USHL completes expansion draft</code> |
|
359 |
+
| <code>NRT LLC, one of the nation's largest residential real estate brokerage companies, announced several executive appointments within its Coldwell Banker Residential Brokerage operations in Southern California.</code> | <code>NRT announces executive appointments at its Coldwell Banker operations in Southern California</code> |
|
360 |
+
| <code>A new survey shows 30 percent of Californians use Twitter, and more and more of us are using our smart phones to go online.</code> | <code>Survey: 30 percent of Californians use Twitter</code> |
|
361 |
+
* Loss: [<code>MultipleNegativesSymmetricRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativessymmetricrankingloss) with these parameters:
|
362 |
+
```json
|
363 |
+
{
|
364 |
+
"scale": 20.0,
|
365 |
+
"similarity_fct": "cos_sim"
|
366 |
+
}
|
367 |
+
```
|
368 |
+
|
369 |
+
### Evaluation Datasets
|
370 |
+
|
371 |
+
#### nli-pairs
|
372 |
+
|
373 |
+
* Dataset: [nli-pairs](https://huggingface.co/datasets/sentence-transformers/all-nli) at [d482672](https://huggingface.co/datasets/sentence-transformers/all-nli/tree/d482672c8e74ce18da116f430137434ba2e52fab)
|
374 |
+
* Size: 2,000 evaluation samples
|
375 |
+
* Columns: <code>sentence1</code> and <code>sentence2</code>
|
376 |
+
* Approximate statistics based on the first 1000 samples:
|
377 |
+
| | sentence1 | sentence2 |
|
378 |
+
|:--------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
|
379 |
+
| type | string | string |
|
380 |
+
| details | <ul><li>min: 5 tokens</li><li>mean: 17.64 tokens</li><li>max: 63 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 9.67 tokens</li><li>max: 29 tokens</li></ul> |
|
381 |
+
* Samples:
|
382 |
+
| sentence1 | sentence2 |
|
383 |
+
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------|
|
384 |
+
| <code>Two women are embracing while holding to go packages.</code> | <code>Two woman are holding packages.</code> |
|
385 |
+
| <code>Two young children in blue jerseys, one with the number 9 and one with the number 2 are standing on wooden steps in a bathroom and washing their hands in a sink.</code> | <code>Two kids in numbered jerseys wash their hands.</code> |
|
386 |
+
| <code>A man selling donuts to a customer during a world exhibition event held in the city of Angeles</code> | <code>A man selling donuts to a customer.</code> |
|
387 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
388 |
+
```json
|
389 |
+
{
|
390 |
+
"scale": 20.0,
|
391 |
+
"similarity_fct": "cos_sim"
|
392 |
+
}
|
393 |
+
```
|
394 |
+
|
395 |
+
#### qnli-contrastive
|
396 |
+
|
397 |
+
* Dataset: [qnli-contrastive](https://huggingface.co/datasets/nyu-mll/glue) at [bcdcba7](https://huggingface.co/datasets/nyu-mll/glue/tree/bcdcba79d07bc864c1c254ccfcedcce55bcc9a8c)
|
398 |
+
* Size: 2,000 evaluation samples
|
399 |
+
* Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
|
400 |
+
* Approximate statistics based on the first 1000 samples:
|
401 |
+
| | sentence1 | sentence2 | label |
|
402 |
+
|:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------|
|
403 |
+
| type | string | string | int |
|
404 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 14.13 tokens</li><li>max: 36 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 36.58 tokens</li><li>max: 225 tokens</li></ul> | <ul><li>0: 100.00%</li></ul> |
|
405 |
+
* Samples:
|
406 |
+
| sentence1 | sentence2 | label |
|
407 |
+
|:--------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
|
408 |
+
| <code>What came into force after the new constitution was herald?</code> | <code>As of that day, the new constitution heralding the Second Republic came into force.</code> | <code>0</code> |
|
409 |
+
| <code>What is the first major city in the stream of the Rhine?</code> | <code>The most important tributaries in this area are the Ill below of Strasbourg, the Neckar in Mannheim and the Main across from Mainz.</code> | <code>0</code> |
|
410 |
+
| <code>What is the minimum required if you want to teach in Canada?</code> | <code>In most provinces a second Bachelor's Degree such as a Bachelor of Education is required to become a qualified teacher.</code> | <code>0</code> |
|
411 |
+
* Loss: [<code>OnlineContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#onlinecontrastiveloss)
|
412 |
+
|
413 |
+
### Training Hyperparameters
|
414 |
+
#### Non-Default Hyperparameters
|
415 |
+
|
416 |
+
- `eval_strategy`: steps
|
417 |
+
- `per_device_train_batch_size`: 64
|
418 |
+
- `per_device_eval_batch_size`: 16
|
419 |
+
- `learning_rate`: 3e-06
|
420 |
+
- `weight_decay`: 1e-10
|
421 |
+
- `num_train_epochs`: 5
|
422 |
+
- `lr_scheduler_type`: cosine
|
423 |
+
- `warmup_ratio`: 0.33
|
424 |
+
- `save_safetensors`: False
|
425 |
+
- `fp16`: True
|
426 |
+
- `push_to_hub`: True
|
427 |
+
- `hub_model_id`: bobox/DeBERTaV3-small-GeneralSentenceTransformer-checkpoints-tmp
|
428 |
+
- `hub_strategy`: checkpoint
|
429 |
+
- `batch_sampler`: no_duplicates
|
430 |
+
- `multi_dataset_batch_sampler`: round_robin
|
431 |
+
|
432 |
+
#### All Hyperparameters
|
433 |
+
<details><summary>Click to expand</summary>
|
434 |
+
|
435 |
+
- `overwrite_output_dir`: False
|
436 |
+
- `do_predict`: False
|
437 |
+
- `eval_strategy`: steps
|
438 |
+
- `prediction_loss_only`: True
|
439 |
+
- `per_device_train_batch_size`: 64
|
440 |
+
- `per_device_eval_batch_size`: 16
|
441 |
+
- `per_gpu_train_batch_size`: None
|
442 |
+
- `per_gpu_eval_batch_size`: None
|
443 |
+
- `gradient_accumulation_steps`: 1
|
444 |
+
- `eval_accumulation_steps`: None
|
445 |
+
- `learning_rate`: 3e-06
|
446 |
+
- `weight_decay`: 1e-10
|
447 |
+
- `adam_beta1`: 0.9
|
448 |
+
- `adam_beta2`: 0.999
|
449 |
+
- `adam_epsilon`: 1e-08
|
450 |
+
- `max_grad_norm`: 1.0
|
451 |
+
- `num_train_epochs`: 5
|
452 |
+
- `max_steps`: -1
|
453 |
+
- `lr_scheduler_type`: cosine
|
454 |
+
- `lr_scheduler_kwargs`: {}
|
455 |
+
- `warmup_ratio`: 0.33
|
456 |
+
- `warmup_steps`: 0
|
457 |
+
- `log_level`: passive
|
458 |
+
- `log_level_replica`: warning
|
459 |
+
- `log_on_each_node`: True
|
460 |
+
- `logging_nan_inf_filter`: True
|
461 |
+
- `save_safetensors`: False
|
462 |
+
- `save_on_each_node`: False
|
463 |
+
- `save_only_model`: False
|
464 |
+
- `restore_callback_states_from_checkpoint`: False
|
465 |
+
- `no_cuda`: False
|
466 |
+
- `use_cpu`: False
|
467 |
+
- `use_mps_device`: False
|
468 |
+
- `seed`: 42
|
469 |
+
- `data_seed`: None
|
470 |
+
- `jit_mode_eval`: False
|
471 |
+
- `use_ipex`: False
|
472 |
+
- `bf16`: False
|
473 |
+
- `fp16`: True
|
474 |
+
- `fp16_opt_level`: O1
|
475 |
+
- `half_precision_backend`: auto
|
476 |
+
- `bf16_full_eval`: False
|
477 |
+
- `fp16_full_eval`: False
|
478 |
+
- `tf32`: None
|
479 |
+
- `local_rank`: 0
|
480 |
+
- `ddp_backend`: None
|
481 |
+
- `tpu_num_cores`: None
|
482 |
+
- `tpu_metrics_debug`: False
|
483 |
+
- `debug`: []
|
484 |
+
- `dataloader_drop_last`: False
|
485 |
+
- `dataloader_num_workers`: 0
|
486 |
+
- `dataloader_prefetch_factor`: None
|
487 |
+
- `past_index`: -1
|
488 |
+
- `disable_tqdm`: False
|
489 |
+
- `remove_unused_columns`: True
|
490 |
+
- `label_names`: None
|
491 |
+
- `load_best_model_at_end`: False
|
492 |
+
- `ignore_data_skip`: False
|
493 |
+
- `fsdp`: []
|
494 |
+
- `fsdp_min_num_params`: 0
|
495 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
496 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
497 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
498 |
+
- `deepspeed`: None
|
499 |
+
- `label_smoothing_factor`: 0.0
|
500 |
+
- `optim`: adamw_torch
|
501 |
+
- `optim_args`: None
|
502 |
+
- `adafactor`: False
|
503 |
+
- `group_by_length`: False
|
504 |
+
- `length_column_name`: length
|
505 |
+
- `ddp_find_unused_parameters`: None
|
506 |
+
- `ddp_bucket_cap_mb`: None
|
507 |
+
- `ddp_broadcast_buffers`: False
|
508 |
+
- `dataloader_pin_memory`: True
|
509 |
+
- `dataloader_persistent_workers`: False
|
510 |
+
- `skip_memory_metrics`: True
|
511 |
+
- `use_legacy_prediction_loop`: False
|
512 |
+
- `push_to_hub`: True
|
513 |
+
- `resume_from_checkpoint`: None
|
514 |
+
- `hub_model_id`: bobox/DeBERTaV3-small-GeneralSentenceTransformer-checkpoints-tmp
|
515 |
+
- `hub_strategy`: checkpoint
|
516 |
+
- `hub_private_repo`: False
|
517 |
+
- `hub_always_push`: False
|
518 |
+
- `gradient_checkpointing`: False
|
519 |
+
- `gradient_checkpointing_kwargs`: None
|
520 |
+
- `include_inputs_for_metrics`: False
|
521 |
+
- `eval_do_concat_batches`: True
|
522 |
+
- `fp16_backend`: auto
|
523 |
+
- `push_to_hub_model_id`: None
|
524 |
+
- `push_to_hub_organization`: None
|
525 |
+
- `mp_parameters`:
|
526 |
+
- `auto_find_batch_size`: False
|
527 |
+
- `full_determinism`: False
|
528 |
+
- `torchdynamo`: None
|
529 |
+
- `ray_scope`: last
|
530 |
+
- `ddp_timeout`: 1800
|
531 |
+
- `torch_compile`: False
|
532 |
+
- `torch_compile_backend`: None
|
533 |
+
- `torch_compile_mode`: None
|
534 |
+
- `dispatch_batches`: None
|
535 |
+
- `split_batches`: None
|
536 |
+
- `include_tokens_per_second`: False
|
537 |
+
- `include_num_input_tokens_seen`: False
|
538 |
+
- `neftune_noise_alpha`: None
|
539 |
+
- `optim_target_modules`: None
|
540 |
+
- `batch_eval_metrics`: False
|
541 |
+
- `batch_sampler`: no_duplicates
|
542 |
+
- `multi_dataset_batch_sampler`: round_robin
|
543 |
+
|
544 |
+
</details>
|
545 |
+
|
546 |
+
### Training Logs
|
547 |
+
| Epoch | Step | Training Loss | qnli-contrastive loss | nli-pairs loss |
|
548 |
+
|:-----:|:----:|:-------------:|:---------------------:|:--------------:|
|
549 |
+
| None | 0 | - | 6.0041 | 4.0946 |
|
550 |
+
| 0.25 | 116 | 4.9013 | 5.9679 | 4.0430 |
|
551 |
+
| 0.5 | 232 | 4.6399 | 5.5328 | 3.8479 |
|
552 |
+
| 0.75 | 348 | 4.4683 | 4.2996 | 3.6937 |
|
553 |
+
| 1.0 | 464 | 3.8129 | 2.8062 | 3.4773 |
|
554 |
+
|
555 |
+
|
556 |
+
### Framework Versions
|
557 |
+
- Python: 3.10.12
|
558 |
+
- Sentence Transformers: 3.0.1
|
559 |
+
- Transformers: 4.41.2
|
560 |
+
- PyTorch: 2.3.0+cu121
|
561 |
+
- Accelerate: 0.31.0
|
562 |
+
- Datasets: 2.20.0
|
563 |
+
- Tokenizers: 0.19.1
|
564 |
+
|
565 |
+
## Citation
|
566 |
+
|
567 |
+
### BibTeX
|
568 |
+
|
569 |
+
#### Sentence Transformers
|
570 |
+
```bibtex
|
571 |
+
@inproceedings{reimers-2019-sentence-bert,
|
572 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
573 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
574 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
575 |
+
month = "11",
|
576 |
+
year = "2019",
|
577 |
+
publisher = "Association for Computational Linguistics",
|
578 |
+
url = "https://arxiv.org/abs/1908.10084",
|
579 |
+
}
|
580 |
+
```
|
581 |
+
|
582 |
+
#### MultipleNegativesRankingLoss
|
583 |
+
```bibtex
|
584 |
+
@misc{henderson2017efficient,
|
585 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
586 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
587 |
+
year={2017},
|
588 |
+
eprint={1705.00652},
|
589 |
+
archivePrefix={arXiv},
|
590 |
+
primaryClass={cs.CL}
|
591 |
+
}
|
592 |
+
```
|
593 |
+
|
594 |
+
#### AnglELoss
|
595 |
+
```bibtex
|
596 |
+
@misc{li2023angleoptimized,
|
597 |
+
title={AnglE-optimized Text Embeddings},
|
598 |
+
author={Xianming Li and Jing Li},
|
599 |
+
year={2023},
|
600 |
+
eprint={2309.12871},
|
601 |
+
archivePrefix={arXiv},
|
602 |
+
primaryClass={cs.CL}
|
603 |
+
}
|
604 |
+
```
|
605 |
+
|
606 |
+
#### GISTEmbedLoss
|
607 |
+
```bibtex
|
608 |
+
@misc{solatorio2024gistembed,
|
609 |
+
title={GISTEmbed: Guided In-sample Selection of Training Negatives for Text Embedding Fine-tuning},
|
610 |
+
author={Aivin V. Solatorio},
|
611 |
+
year={2024},
|
612 |
+
eprint={2402.16829},
|
613 |
+
archivePrefix={arXiv},
|
614 |
+
primaryClass={cs.LG}
|
615 |
+
}
|
616 |
+
```
|
617 |
+
|
618 |
+
<!--
|
619 |
+
## Glossary
|
620 |
+
|
621 |
+
*Clearly define terms in order to be accessible across audiences.*
|
622 |
+
-->
|
623 |
+
|
624 |
+
<!--
|
625 |
+
## Model Card Authors
|
626 |
+
|
627 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
628 |
+
-->
|
629 |
+
|
630 |
+
<!--
|
631 |
+
## Model Card Contact
|
632 |
+
|
633 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
634 |
+
-->
|
last-checkpoint/added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"[MASK]": 128000
|
3 |
+
}
|
last-checkpoint/config.json
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "microsoft/deberta-v3-small",
|
3 |
+
"architectures": [
|
4 |
+
"DebertaV2Model"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"hidden_act": "gelu",
|
8 |
+
"hidden_dropout_prob": 0.1,
|
9 |
+
"hidden_size": 768,
|
10 |
+
"initializer_range": 0.02,
|
11 |
+
"intermediate_size": 3072,
|
12 |
+
"layer_norm_eps": 1e-07,
|
13 |
+
"max_position_embeddings": 512,
|
14 |
+
"max_relative_positions": -1,
|
15 |
+
"model_type": "deberta-v2",
|
16 |
+
"norm_rel_ebd": "layer_norm",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 6,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"pooler_dropout": 0,
|
21 |
+
"pooler_hidden_act": "gelu",
|
22 |
+
"pooler_hidden_size": 768,
|
23 |
+
"pos_att_type": [
|
24 |
+
"p2c",
|
25 |
+
"c2p"
|
26 |
+
],
|
27 |
+
"position_biased_input": false,
|
28 |
+
"position_buckets": 256,
|
29 |
+
"relative_attention": true,
|
30 |
+
"share_att_key": true,
|
31 |
+
"torch_dtype": "float32",
|
32 |
+
"transformers_version": "4.41.2",
|
33 |
+
"type_vocab_size": 0,
|
34 |
+
"vocab_size": 128100
|
35 |
+
}
|
last-checkpoint/config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.1",
|
4 |
+
"transformers": "4.41.2",
|
5 |
+
"pytorch": "2.3.0+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
last-checkpoint/modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
last-checkpoint/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1b597b0fc92d0b9e12711e77db2df5632db8e4f99e0202c6efc3f014202be84
|
3 |
+
size 1130520122
|
last-checkpoint/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff91915fd0ee36e4ba9d9d6ade05a5ea022b50bbe2d87122a050772b8597fe4a
|
3 |
+
size 565251810
|
last-checkpoint/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a13f8f4ac4f71c8ac2411ec94584dffb535fff26a37d3787d5685cd9e588b3b4
|
3 |
+
size 14180
|
last-checkpoint/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43709499a8cea7d53df5052d159d8c3a24f835a2f5af6141080ad335219b234b
|
3 |
+
size 1064
|
last-checkpoint/sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
last-checkpoint/special_tokens_map.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "[CLS]",
|
3 |
+
"cls_token": "[CLS]",
|
4 |
+
"eos_token": "[SEP]",
|
5 |
+
"mask_token": "[MASK]",
|
6 |
+
"pad_token": "[PAD]",
|
7 |
+
"sep_token": "[SEP]",
|
8 |
+
"unk_token": {
|
9 |
+
"content": "[UNK]",
|
10 |
+
"lstrip": false,
|
11 |
+
"normalized": true,
|
12 |
+
"rstrip": false,
|
13 |
+
"single_word": false
|
14 |
+
}
|
15 |
+
}
|
last-checkpoint/spm.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
|
3 |
+
size 2464616
|
last-checkpoint/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
last-checkpoint/tokenizer_config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "[CLS]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "[SEP]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "[UNK]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": true,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"128000": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"bos_token": "[CLS]",
|
45 |
+
"clean_up_tokenization_spaces": true,
|
46 |
+
"cls_token": "[CLS]",
|
47 |
+
"do_lower_case": false,
|
48 |
+
"eos_token": "[SEP]",
|
49 |
+
"mask_token": "[MASK]",
|
50 |
+
"model_max_length": 1000000000000000019884624838656,
|
51 |
+
"pad_token": "[PAD]",
|
52 |
+
"sep_token": "[SEP]",
|
53 |
+
"sp_model_kwargs": {},
|
54 |
+
"split_by_punct": false,
|
55 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
56 |
+
"unk_token": "[UNK]",
|
57 |
+
"vocab_type": "spm"
|
58 |
+
}
|
last-checkpoint/trainer_state.json
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0043103448275863,
|
5 |
+
"eval_steps": 116,
|
6 |
+
"global_step": 466,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.25,
|
13 |
+
"grad_norm": 34.43177795410156,
|
14 |
+
"learning_rate": 4.386422976501306e-07,
|
15 |
+
"loss": 4.9013,
|
16 |
+
"step": 116
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.25,
|
20 |
+
"eval_nli-pairs_loss": 4.042984962463379,
|
21 |
+
"eval_nli-pairs_runtime": 1.346,
|
22 |
+
"eval_nli-pairs_samples_per_second": 1485.842,
|
23 |
+
"eval_nli-pairs_steps_per_second": 92.865,
|
24 |
+
"step": 116
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 0.25,
|
28 |
+
"eval_qnli-contrastive_loss": 5.967944145202637,
|
29 |
+
"eval_qnli-contrastive_runtime": 1.4504,
|
30 |
+
"eval_qnli-contrastive_samples_per_second": 1378.915,
|
31 |
+
"eval_qnli-contrastive_steps_per_second": 86.182,
|
32 |
+
"step": 116
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"epoch": 0.5,
|
36 |
+
"grad_norm": 12.268269538879395,
|
37 |
+
"learning_rate": 8.929503916449087e-07,
|
38 |
+
"loss": 4.6399,
|
39 |
+
"step": 232
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 0.5,
|
43 |
+
"eval_nli-pairs_loss": 3.8479082584381104,
|
44 |
+
"eval_nli-pairs_runtime": 1.2981,
|
45 |
+
"eval_nli-pairs_samples_per_second": 1540.739,
|
46 |
+
"eval_nli-pairs_steps_per_second": 96.296,
|
47 |
+
"step": 232
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"epoch": 0.5,
|
51 |
+
"eval_qnli-contrastive_loss": 5.532758712768555,
|
52 |
+
"eval_qnli-contrastive_runtime": 1.4399,
|
53 |
+
"eval_qnli-contrastive_samples_per_second": 1388.943,
|
54 |
+
"eval_qnli-contrastive_steps_per_second": 86.809,
|
55 |
+
"step": 232
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"epoch": 0.75,
|
59 |
+
"grad_norm": 41.44931411743164,
|
60 |
+
"learning_rate": 1.343342036553525e-06,
|
61 |
+
"loss": 4.4683,
|
62 |
+
"step": 348
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"epoch": 0.75,
|
66 |
+
"eval_nli-pairs_loss": 3.69368314743042,
|
67 |
+
"eval_nli-pairs_runtime": 1.304,
|
68 |
+
"eval_nli-pairs_samples_per_second": 1533.712,
|
69 |
+
"eval_nli-pairs_steps_per_second": 95.857,
|
70 |
+
"step": 348
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"epoch": 0.75,
|
74 |
+
"eval_qnli-contrastive_loss": 4.299612998962402,
|
75 |
+
"eval_qnli-contrastive_runtime": 1.4461,
|
76 |
+
"eval_qnli-contrastive_samples_per_second": 1383.027,
|
77 |
+
"eval_qnli-contrastive_steps_per_second": 86.439,
|
78 |
+
"step": 348
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"epoch": 1.0,
|
82 |
+
"grad_norm": 16.208133697509766,
|
83 |
+
"learning_rate": 1.7976501305483032e-06,
|
84 |
+
"loss": 3.8129,
|
85 |
+
"step": 464
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"epoch": 1.0,
|
89 |
+
"eval_nli-pairs_loss": 3.477344036102295,
|
90 |
+
"eval_nli-pairs_runtime": 1.2934,
|
91 |
+
"eval_nli-pairs_samples_per_second": 1546.283,
|
92 |
+
"eval_nli-pairs_steps_per_second": 96.643,
|
93 |
+
"step": 464
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.0,
|
97 |
+
"eval_qnli-contrastive_loss": 2.806222915649414,
|
98 |
+
"eval_qnli-contrastive_runtime": 1.4307,
|
99 |
+
"eval_qnli-contrastive_samples_per_second": 1397.948,
|
100 |
+
"eval_qnli-contrastive_steps_per_second": 87.372,
|
101 |
+
"step": 464
|
102 |
+
}
|
103 |
+
],
|
104 |
+
"logging_steps": 116,
|
105 |
+
"max_steps": 2320,
|
106 |
+
"num_input_tokens_seen": 0,
|
107 |
+
"num_train_epochs": 5,
|
108 |
+
"save_steps": 1160,
|
109 |
+
"stateful_callbacks": {
|
110 |
+
"TrainerControl": {
|
111 |
+
"args": {
|
112 |
+
"should_epoch_stop": false,
|
113 |
+
"should_evaluate": false,
|
114 |
+
"should_log": false,
|
115 |
+
"should_save": true,
|
116 |
+
"should_training_stop": false
|
117 |
+
},
|
118 |
+
"attributes": {}
|
119 |
+
}
|
120 |
+
},
|
121 |
+
"total_flos": 0.0,
|
122 |
+
"train_batch_size": 64,
|
123 |
+
"trial_name": null,
|
124 |
+
"trial_params": null
|
125 |
+
}
|
last-checkpoint/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3927bcd6a0300af031763633963eca0939bd43c5b1e09d98c207a1618aa7358
|
3 |
+
size 5624
|