acayir64 commited on
Commit
3fe70ae
1 Parent(s): aa015bf

Upload folder using huggingface_hub

Browse files
Files changed (47) hide show
  1. .gitattributes +6 -0
  2. 1_Pooling/config.json +10 -0
  3. README.md +17 -0
  4. checkpoint-1564/1_Pooling/config.json +10 -0
  5. checkpoint-1564/README.md +542 -0
  6. checkpoint-1564/config.json +26 -0
  7. checkpoint-1564/config_sentence_transformers.json +10 -0
  8. checkpoint-1564/model.safetensors +3 -0
  9. checkpoint-1564/modules.json +14 -0
  10. checkpoint-1564/optimizer.pt +3 -0
  11. checkpoint-1564/rng_state.pth +3 -0
  12. checkpoint-1564/scheduler.pt +3 -0
  13. checkpoint-1564/sentence_bert_config.json +4 -0
  14. checkpoint-1564/special_tokens_map.json +51 -0
  15. checkpoint-1564/tokenizer.json +3 -0
  16. checkpoint-1564/tokenizer_config.json +64 -0
  17. checkpoint-1564/trainer_state.json +512 -0
  18. checkpoint-1564/training_args.bin +3 -0
  19. checkpoint-1564/unigram.json +3 -0
  20. checkpoint-3128/1_Pooling/config.json +10 -0
  21. checkpoint-3128/README.md +610 -0
  22. checkpoint-3128/config.json +26 -0
  23. checkpoint-3128/config_sentence_transformers.json +10 -0
  24. checkpoint-3128/model.safetensors +3 -0
  25. checkpoint-3128/modules.json +14 -0
  26. checkpoint-3128/optimizer.pt +3 -0
  27. checkpoint-3128/rng_state.pth +3 -0
  28. checkpoint-3128/scheduler.pt +3 -0
  29. checkpoint-3128/sentence_bert_config.json +4 -0
  30. checkpoint-3128/special_tokens_map.json +51 -0
  31. checkpoint-3128/tokenizer.json +3 -0
  32. checkpoint-3128/tokenizer_config.json +64 -0
  33. checkpoint-3128/trainer_state.json +989 -0
  34. checkpoint-3128/training_args.bin +3 -0
  35. checkpoint-3128/unigram.json +3 -0
  36. config.json +26 -0
  37. config_sentence_transformers.json +10 -0
  38. model.safetensors +3 -0
  39. modules.json +14 -0
  40. runs/May29_18-52-15_338a77628651/events.out.tfevents.1717008737.338a77628651.19835.0 +2 -2
  41. sentence_bert_config.json +4 -0
  42. special_tokens_map.json +51 -0
  43. tokenizer.json +3 -0
  44. tokenizer_config.json +64 -0
  45. training_args.bin +3 -0
  46. training_params.json +33 -0
  47. unigram.json +3 -0
.gitattributes CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-1564/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ checkpoint-1564/unigram.json filter=lfs diff=lfs merge=lfs -text
38
+ checkpoint-3128/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ checkpoint-3128/unigram.json filter=lfs diff=lfs merge=lfs -text
40
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ unigram.json filter=lfs diff=lfs merge=lfs -text
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - autotrain
5
+ - sentence-transformers
6
+ widget:
7
+ - text: "I love AutoTrain"
8
+ datasets:
9
+ - ucsahin/TR-Extractive-QA-5K
10
+ ---
11
+
12
+ # Model Trained Using AutoTrain
13
+
14
+ - Problem type: Sentence Transformers
15
+
16
+ ## Validation Metrics
17
+ No validation metrics available
checkpoint-1564/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-1564/README.md ADDED
@@ -0,0 +1,542 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - multilingual
4
+ - zh
5
+ - ja
6
+ - ar
7
+ - ko
8
+ - de
9
+ - fr
10
+ - es
11
+ - pt
12
+ - hi
13
+ - id
14
+ - it
15
+ - tr
16
+ - ru
17
+ - bn
18
+ - ur
19
+ - mr
20
+ - ta
21
+ - vi
22
+ - fa
23
+ - pl
24
+ - uk
25
+ - nl
26
+ - sv
27
+ - he
28
+ - sw
29
+ - ps
30
+ library_name: sentence-transformers
31
+ tags:
32
+ - sentence-transformers
33
+ - sentence-similarity
34
+ - feature-extraction
35
+ - dataset_size:10K<n<100K
36
+ - loss:CoSENTLoss
37
+ base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
38
+ metrics:
39
+ - pearson_cosine
40
+ - spearman_cosine
41
+ - pearson_manhattan
42
+ - spearman_manhattan
43
+ - pearson_euclidean
44
+ - spearman_euclidean
45
+ - pearson_dot
46
+ - spearman_dot
47
+ - pearson_max
48
+ - spearman_max
49
+ widget:
50
+ - source_sentence: Is that wrong?
51
+ sentences:
52
+ - Is that such a terrible thing?
53
+ - Kennedy korkunç bir savcıydı.
54
+ - Tom bir davada tanıklık ediyordu.
55
+ - source_sentence: Orada mıydılar?
56
+ sentences:
57
+ - Were they in there?
58
+ - İlki ikincisini anlamlı kılar.
59
+ - Alerji tedavisi gelişiyor.
60
+ - source_sentence: He is not alone
61
+ sentences:
62
+ - It is not confusing
63
+ - The Hawks were humanitarians.
64
+ - Tom bir davada tanıklık ediyordu.
65
+ - source_sentence: Yaptığın şey bu.
66
+ sentences:
67
+ - Onurlu işler yapıyorsunuz.
68
+ - Weisberg azınlık adına konuştu.
69
+ - Robert Ferrigno Kaliforniya'da doğdu.
70
+ - source_sentence: Ben vatansızım.
71
+ sentences:
72
+ - I am stateless.
73
+ - Kendi tekniğini tercih ediyor.
74
+ - Mermiler camdan fırladı.
75
+ pipeline_tag: sentence-similarity
76
+ model-index:
77
+ - name: SentenceTransformer based on sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
78
+ results:
79
+ - task:
80
+ type: semantic-similarity
81
+ name: Semantic Similarity
82
+ dataset:
83
+ name: tr ling
84
+ type: tr_ling
85
+ metrics:
86
+ - type: pearson_cosine
87
+ value: 0.037604255015168134
88
+ name: Pearson Cosine
89
+ - type: spearman_cosine
90
+ value: 0.04804112988506346
91
+ name: Spearman Cosine
92
+ - type: pearson_manhattan
93
+ value: 0.034740275152181296
94
+ name: Pearson Manhattan
95
+ - type: spearman_manhattan
96
+ value: 0.03769766156967754
97
+ name: Spearman Manhattan
98
+ - type: pearson_euclidean
99
+ value: 0.03698411306484619
100
+ name: Pearson Euclidean
101
+ - type: spearman_euclidean
102
+ value: 0.03903062430281842
103
+ name: Spearman Euclidean
104
+ - type: pearson_dot
105
+ value: 0.0673696846368413
106
+ name: Pearson Dot
107
+ - type: spearman_dot
108
+ value: 0.06818119362900125
109
+ name: Spearman Dot
110
+ - type: pearson_max
111
+ value: 0.0673696846368413
112
+ name: Pearson Max
113
+ - type: spearman_max
114
+ value: 0.06818119362900125
115
+ name: Spearman Max
116
+ ---
117
+
118
+ # SentenceTransformer based on sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
119
+
120
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) on the [MoritzLaurer/multilingual-nli-26lang-2mil7](https://huggingface.co/datasets/MoritzLaurer/multilingual-nli-26lang-2mil7) dataset. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
121
+
122
+ ## Model Details
123
+
124
+ ### Model Description
125
+ - **Model Type:** Sentence Transformer
126
+ - **Base model:** [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) <!-- at revision bf3bf13ab40c3157080a7ab344c831b9ad18b5eb -->
127
+ - **Maximum Sequence Length:** 128 tokens
128
+ - **Output Dimensionality:** 384 tokens
129
+ - **Similarity Function:** Cosine Similarity
130
+ - **Training Dataset:**
131
+ - [MoritzLaurer/multilingual-nli-26lang-2mil7](https://huggingface.co/datasets/MoritzLaurer/multilingual-nli-26lang-2mil7)
132
+ - **Languages:** multilingual, zh, ja, ar, ko, de, fr, es, pt, hi, id, it, tr, ru, bn, ur, mr, ta, vi, fa, pl, uk, nl, sv, he, sw, ps
133
+ <!-- - **License:** Unknown -->
134
+
135
+ ### Model Sources
136
+
137
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
138
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
139
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
140
+
141
+ ### Full Model Architecture
142
+
143
+ ```
144
+ SentenceTransformer(
145
+ (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
146
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
147
+ )
148
+ ```
149
+
150
+ ## Usage
151
+
152
+ ### Direct Usage (Sentence Transformers)
153
+
154
+ First install the Sentence Transformers library:
155
+
156
+ ```bash
157
+ pip install -U sentence-transformers
158
+ ```
159
+
160
+ Then you can load this model and run inference.
161
+ ```python
162
+ from sentence_transformers import SentenceTransformer
163
+
164
+ # Download from the 🤗 Hub
165
+ model = SentenceTransformer("sentence_transformers_model_id")
166
+ # Run inference
167
+ sentences = [
168
+ 'Ben vatansızım.',
169
+ 'I am stateless.',
170
+ 'Kendi tekniğini tercih ediyor.',
171
+ ]
172
+ embeddings = model.encode(sentences)
173
+ print(embeddings.shape)
174
+ # [3, 384]
175
+
176
+ # Get the similarity scores for the embeddings
177
+ similarities = model.similarity(embeddings, embeddings)
178
+ print(similarities.shape)
179
+ # [3, 3]
180
+ ```
181
+
182
+ <!--
183
+ ### Direct Usage (Transformers)
184
+
185
+ <details><summary>Click to see the direct usage in Transformers</summary>
186
+
187
+ </details>
188
+ -->
189
+
190
+ <!--
191
+ ### Downstream Usage (Sentence Transformers)
192
+
193
+ You can finetune this model on your own dataset.
194
+
195
+ <details><summary>Click to expand</summary>
196
+
197
+ </details>
198
+ -->
199
+
200
+ <!--
201
+ ### Out-of-Scope Use
202
+
203
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
204
+ -->
205
+
206
+ ## Evaluation
207
+
208
+ ### Metrics
209
+
210
+ #### Semantic Similarity
211
+ * Dataset: `tr_ling`
212
+ * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
213
+
214
+ | Metric | Value |
215
+ |:-------------------|:-----------|
216
+ | pearson_cosine | 0.0376 |
217
+ | spearman_cosine | 0.048 |
218
+ | pearson_manhattan | 0.0347 |
219
+ | spearman_manhattan | 0.0377 |
220
+ | pearson_euclidean | 0.037 |
221
+ | spearman_euclidean | 0.039 |
222
+ | pearson_dot | 0.0674 |
223
+ | spearman_dot | 0.0682 |
224
+ | pearson_max | 0.0674 |
225
+ | **spearman_max** | **0.0682** |
226
+
227
+ <!--
228
+ ## Bias, Risks and Limitations
229
+
230
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
231
+ -->
232
+
233
+ <!--
234
+ ### Recommendations
235
+
236
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
237
+ -->
238
+
239
+ ## Training Details
240
+
241
+ ### Training Dataset
242
+
243
+ #### MoritzLaurer/multilingual-nli-26lang-2mil7
244
+
245
+ * Dataset: [MoritzLaurer/multilingual-nli-26lang-2mil7](https://huggingface.co/datasets/MoritzLaurer/multilingual-nli-26lang-2mil7) at [510a233](https://huggingface.co/datasets/MoritzLaurer/multilingual-nli-26lang-2mil7/tree/510a233972a0d7ff0f767d82f46e046832c10538)
246
+ * Size: 25,000 training samples
247
+ * Columns: <code>premise_original</code>, <code>hypothesis_original</code>, <code>score</code>, <code>sentence1</code>, and <code>sentence2</code>
248
+ * Approximate statistics based on the first 1000 samples:
249
+ | | premise_original | hypothesis_original | score | sentence1 | sentence2 |
250
+ |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
251
+ | type | string | string | int | string | string |
252
+ | details | <ul><li>min: 4 tokens</li><li>mean: 29.3 tokens</li><li>max: 107 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 15.62 tokens</li><li>max: 40 tokens</li></ul> | <ul><li>0: ~34.50%</li><li>1: ~33.30%</li><li>2: ~32.20%</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 28.28 tokens</li><li>max: 101 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 15.39 tokens</li><li>max: 38 tokens</li></ul> |
253
+ * Samples:
254
+ | premise_original | hypothesis_original | score | sentence1 | sentence2 |
255
+ |:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------|
256
+ | <code>N, the total number of LC50 values used in calculating the CV(%) varied with organism and toxicant because some data were rejected due to water hardness, lack of concentration measurements, and/or because some of the LC50s were not calculable.</code> | <code>Most discarded data was rejected due to water hardness.</code> | <code>1</code> | <code>N, CV'nin hesaplanmasında kullanılan LC50 değerlerinin toplam sayısı (%) organizma ve toksik madde ile çeşitlidir, çünkü bazı veriler su sertliği, konsantrasyon ölçümlerinin eksikliği ve / veya LC50'lerin bazıları hesaplanamaz olduğu için reddedilmiştir.</code> | <code>Atılan verilerin çoğu su sertliği nedeniyle reddedildi.</code> |
257
+ | <code>As the home of the Venus de Milo and Mona Lisa, the Louvre drew almost unmanageable crowds until President Mitterrand ordered its re-organization in the 1980s.</code> | <code>The Louvre is home of the Venus de Milo and Mona Lisa.</code> | <code>0</code> | <code>Venus de Milo ve Mona Lisa'nın evi olarak Louvre, Başkan Mitterrand'ın 1980'lerde yeniden düzenlenmesini emredene kadar neredeyse yönetilemez kalabalıklar çekti.</code> | <code>Louvre, Venus de Milo ve Mona Lisa'nın evidir.</code> |
258
+ | <code>A year ago, the wife of the Oxford don noticed that the pattern on Kleenex quilted tissue uncannily resembled the Penrose Arrowed Rhombi tilings pattern, which Sir Roger had invented--and copyrighted--in 1974.</code> | <code>It has been recently found out a similarity between the pattern on the recent Kleenex quilted tissue and the one of the Penrose Arrowed Rhombi tilings.</code> | <code>0</code> | <code>Bir yıl önce Oxford'un karısı, Kleenex kapitone dokudaki desenin 1974'te Sir Roger'ın icat ettiği -ve telif hakkı olan - Penrose Arrowed Rhombi tilings desenine benzediğini fark etti.</code> | <code>Yakın zamanda, son Kleenex kapitone dokudaki desen ile Penrose Arrowed Rhombi döşemelerinden biri arasında bir benzerlik bulunmuştur.</code> |
259
+ * Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
260
+ ```json
261
+ {
262
+ "scale": 20.0,
263
+ "similarity_fct": "pairwise_cos_sim"
264
+ }
265
+ ```
266
+
267
+ ### Evaluation Dataset
268
+
269
+ #### MoritzLaurer/multilingual-nli-26lang-2mil7
270
+
271
+ * Dataset: [MoritzLaurer/multilingual-nli-26lang-2mil7](https://huggingface.co/datasets/MoritzLaurer/multilingual-nli-26lang-2mil7) at [510a233](https://huggingface.co/datasets/MoritzLaurer/multilingual-nli-26lang-2mil7/tree/510a233972a0d7ff0f767d82f46e046832c10538)
272
+ * Size: 5,000 evaluation samples
273
+ * Columns: <code>premise_original</code>, <code>hypothesis_original</code>, <code>score</code>, <code>sentence1</code>, and <code>sentence2</code>
274
+ * Approximate statistics based on the first 1000 samples:
275
+ | | premise_original | hypothesis_original | score | sentence1 | sentence2 |
276
+ |:--------|:---------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
277
+ | type | string | string | int | string | string |
278
+ | details | <ul><li>min: 5 tokens</li><li>mean: 30.3 tokens</li><li>max: 99 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 15.11 tokens</li><li>max: 56 tokens</li></ul> | <ul><li>0: ~34.50%</li><li>1: ~29.90%</li><li>2: ~35.60%</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 29.94 tokens</li><li>max: 106 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 15.29 tokens</li><li>max: 52 tokens</li></ul> |
279
+ * Samples:
280
+ | premise_original | hypothesis_original | score | sentence1 | sentence2 |
281
+ |:----------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------|:---------------|:------------------------------------------------------------------------------|:-----------------------------------------------------------------|
282
+ | <code>But the racism charge isn't quirky or wacky--it's demagogy.</code> | <code>The accusation of prejudice based on a pedestrian kind of hatred.</code> | <code>0</code> | <code>Ama ırkçılık suçlaması tuhaf ya da tuhaf değil, bu bir demagoji.</code> | <code>Yaya nefretine dayanan önyargı suçlaması.</code> |
283
+ | <code>Why would Gates allow the publication of such a book with his byline and photo on the dust jacket?</code> | <code>Gates' byline and photo are on the dust jacket</code> | <code>0</code> | <code>Gates neden böyle bir kitabın basılmasına izin versin ki?</code> | <code>Gates'in çizgisi ve fotoğrafı toz ceketin üzerinde.</code> |
284
+ | <code>I am a nonsmoker and allergic to cigarette smoke.</code> | <code>I do not smoke.</code> | <code>0</code> | <code>Sigara içmeyen biriyim ve sigara dumanına alerjim var.</code> | <code>Sigara içmiyorum.</code> |
285
+ * Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
286
+ ```json
287
+ {
288
+ "scale": 20.0,
289
+ "similarity_fct": "pairwise_cos_sim"
290
+ }
291
+ ```
292
+
293
+ ### Training Hyperparameters
294
+ #### Non-Default Hyperparameters
295
+
296
+ - `eval_strategy`: epoch
297
+ - `per_device_train_batch_size`: 32
298
+ - `per_device_eval_batch_size`: 64
299
+ - `learning_rate`: 2e-05
300
+ - `num_train_epochs`: 5
301
+ - `warmup_ratio`: 0.1
302
+ - `fp16`: True
303
+ - `load_best_model_at_end`: True
304
+ - `ddp_find_unused_parameters`: False
305
+
306
+ #### All Hyperparameters
307
+ <details><summary>Click to expand</summary>
308
+
309
+ - `overwrite_output_dir`: False
310
+ - `do_predict`: False
311
+ - `eval_strategy`: epoch
312
+ - `prediction_loss_only`: True
313
+ - `per_device_train_batch_size`: 32
314
+ - `per_device_eval_batch_size`: 64
315
+ - `per_gpu_train_batch_size`: None
316
+ - `per_gpu_eval_batch_size`: None
317
+ - `gradient_accumulation_steps`: 1
318
+ - `eval_accumulation_steps`: None
319
+ - `learning_rate`: 2e-05
320
+ - `weight_decay`: 0.0
321
+ - `adam_beta1`: 0.9
322
+ - `adam_beta2`: 0.999
323
+ - `adam_epsilon`: 1e-08
324
+ - `max_grad_norm`: 1.0
325
+ - `num_train_epochs`: 5
326
+ - `max_steps`: -1
327
+ - `lr_scheduler_type`: linear
328
+ - `lr_scheduler_kwargs`: {}
329
+ - `warmup_ratio`: 0.1
330
+ - `warmup_steps`: 0
331
+ - `log_level`: passive
332
+ - `log_level_replica`: warning
333
+ - `log_on_each_node`: True
334
+ - `logging_nan_inf_filter`: True
335
+ - `save_safetensors`: True
336
+ - `save_on_each_node`: False
337
+ - `save_only_model`: False
338
+ - `restore_callback_states_from_checkpoint`: False
339
+ - `no_cuda`: False
340
+ - `use_cpu`: False
341
+ - `use_mps_device`: False
342
+ - `seed`: 42
343
+ - `data_seed`: None
344
+ - `jit_mode_eval`: False
345
+ - `use_ipex`: False
346
+ - `bf16`: False
347
+ - `fp16`: True
348
+ - `fp16_opt_level`: O1
349
+ - `half_precision_backend`: auto
350
+ - `bf16_full_eval`: False
351
+ - `fp16_full_eval`: False
352
+ - `tf32`: None
353
+ - `local_rank`: 0
354
+ - `ddp_backend`: None
355
+ - `tpu_num_cores`: None
356
+ - `tpu_metrics_debug`: False
357
+ - `debug`: []
358
+ - `dataloader_drop_last`: False
359
+ - `dataloader_num_workers`: 0
360
+ - `dataloader_prefetch_factor`: None
361
+ - `past_index`: -1
362
+ - `disable_tqdm`: False
363
+ - `remove_unused_columns`: True
364
+ - `label_names`: None
365
+ - `load_best_model_at_end`: True
366
+ - `ignore_data_skip`: False
367
+ - `fsdp`: []
368
+ - `fsdp_min_num_params`: 0
369
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
370
+ - `fsdp_transformer_layer_cls_to_wrap`: None
371
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
372
+ - `deepspeed`: None
373
+ - `label_smoothing_factor`: 0.0
374
+ - `optim`: adamw_torch
375
+ - `optim_args`: None
376
+ - `adafactor`: False
377
+ - `group_by_length`: False
378
+ - `length_column_name`: length
379
+ - `ddp_find_unused_parameters`: False
380
+ - `ddp_bucket_cap_mb`: None
381
+ - `ddp_broadcast_buffers`: False
382
+ - `dataloader_pin_memory`: True
383
+ - `dataloader_persistent_workers`: False
384
+ - `skip_memory_metrics`: True
385
+ - `use_legacy_prediction_loop`: False
386
+ - `push_to_hub`: False
387
+ - `resume_from_checkpoint`: None
388
+ - `hub_model_id`: None
389
+ - `hub_strategy`: every_save
390
+ - `hub_private_repo`: False
391
+ - `hub_always_push`: False
392
+ - `gradient_checkpointing`: False
393
+ - `gradient_checkpointing_kwargs`: None
394
+ - `include_inputs_for_metrics`: False
395
+ - `eval_do_concat_batches`: True
396
+ - `fp16_backend`: auto
397
+ - `push_to_hub_model_id`: None
398
+ - `push_to_hub_organization`: None
399
+ - `mp_parameters`:
400
+ - `auto_find_batch_size`: False
401
+ - `full_determinism`: False
402
+ - `torchdynamo`: None
403
+ - `ray_scope`: last
404
+ - `ddp_timeout`: 1800
405
+ - `torch_compile`: False
406
+ - `torch_compile_backend`: None
407
+ - `torch_compile_mode`: None
408
+ - `dispatch_batches`: None
409
+ - `split_batches`: None
410
+ - `include_tokens_per_second`: False
411
+ - `include_num_input_tokens_seen`: False
412
+ - `neftune_noise_alpha`: None
413
+ - `optim_target_modules`: None
414
+ - `batch_eval_metrics`: False
415
+ - `batch_sampler`: batch_sampler
416
+ - `multi_dataset_batch_sampler`: proportional
417
+
418
+ </details>
419
+
420
+ ### Training Logs
421
+ | Epoch | Step | Training Loss | loss | tr_ling_spearman_max |
422
+ |:------:|:----:|:-------------:|:------:|:--------------------:|
423
+ | 0.0320 | 25 | 17.17 | - | - |
424
+ | 0.0639 | 50 | 16.4932 | - | - |
425
+ | 0.0959 | 75 | 16.5976 | - | - |
426
+ | 0.1279 | 100 | 15.6991 | - | - |
427
+ | 0.1598 | 125 | 14.876 | - | - |
428
+ | 0.1918 | 150 | 14.4828 | - | - |
429
+ | 0.2238 | 175 | 12.7061 | - | - |
430
+ | 0.2558 | 200 | 10.8687 | - | - |
431
+ | 0.2877 | 225 | 8.3797 | - | - |
432
+ | 0.3197 | 250 | 6.2029 | - | - |
433
+ | 0.3517 | 275 | 5.8228 | - | - |
434
+ | 0.3836 | 300 | 5.811 | - | - |
435
+ | 0.4156 | 325 | 5.8079 | - | - |
436
+ | 0.4476 | 350 | 5.8077 | - | - |
437
+ | 0.4795 | 375 | 5.8035 | - | - |
438
+ | 0.5115 | 400 | 5.8072 | - | - |
439
+ | 0.5435 | 425 | 5.8033 | - | - |
440
+ | 0.5754 | 450 | 5.8086 | - | - |
441
+ | 0.6074 | 475 | 5.81 | - | - |
442
+ | 0.6394 | 500 | 5.7949 | - | - |
443
+ | 0.6714 | 525 | 5.8079 | - | - |
444
+ | 0.7033 | 550 | 5.8057 | - | - |
445
+ | 0.7353 | 575 | 5.8097 | - | - |
446
+ | 0.7673 | 600 | 5.7986 | - | - |
447
+ | 0.7992 | 625 | 5.8051 | - | - |
448
+ | 0.8312 | 650 | 5.8041 | - | - |
449
+ | 0.8632 | 675 | 5.7907 | - | - |
450
+ | 0.8951 | 700 | 5.7991 | - | - |
451
+ | 0.9271 | 725 | 5.8035 | - | - |
452
+ | 0.9591 | 750 | 5.7945 | - | - |
453
+ | 0.9910 | 775 | 5.8077 | - | - |
454
+ | 1.0 | 782 | - | 5.8024 | 0.0330 |
455
+ | 1.0230 | 800 | 5.6703 | - | - |
456
+ | 1.0550 | 825 | 5.8052 | - | - |
457
+ | 1.0870 | 850 | 5.7936 | - | - |
458
+ | 1.1189 | 875 | 5.7924 | - | - |
459
+ | 1.1509 | 900 | 5.7806 | - | - |
460
+ | 1.1829 | 925 | 5.7835 | - | - |
461
+ | 1.2148 | 950 | 5.7619 | - | - |
462
+ | 1.2468 | 975 | 5.8038 | - | - |
463
+ | 1.2788 | 1000 | 5.779 | - | - |
464
+ | 1.3107 | 1025 | 5.7904 | - | - |
465
+ | 1.3427 | 1050 | 5.7696 | - | - |
466
+ | 1.3747 | 1075 | 5.7919 | - | - |
467
+ | 1.4066 | 1100 | 5.7785 | - | - |
468
+ | 1.4386 | 1125 | 5.7862 | - | - |
469
+ | 1.4706 | 1150 | 5.7703 | - | - |
470
+ | 1.5026 | 1175 | 5.773 | - | - |
471
+ | 1.5345 | 1200 | 5.7627 | - | - |
472
+ | 1.5665 | 1225 | 5.7596 | - | - |
473
+ | 1.5985 | 1250 | 5.7882 | - | - |
474
+ | 1.6304 | 1275 | 5.7828 | - | - |
475
+ | 1.6624 | 1300 | 5.771 | - | - |
476
+ | 1.6944 | 1325 | 5.788 | - | - |
477
+ | 1.7263 | 1350 | 5.7719 | - | - |
478
+ | 1.7583 | 1375 | 5.7846 | - | - |
479
+ | 1.7903 | 1400 | 5.7838 | - | - |
480
+ | 1.8223 | 1425 | 5.7912 | - | - |
481
+ | 1.8542 | 1450 | 5.7686 | - | - |
482
+ | 1.8862 | 1475 | 5.7938 | - | - |
483
+ | 1.9182 | 1500 | 5.7847 | - | - |
484
+ | 1.9501 | 1525 | 5.7952 | - | - |
485
+ | 1.9821 | 1550 | 5.7528 | - | - |
486
+ | 2.0 | 1564 | - | 5.7933 | 0.0682 |
487
+
488
+
489
+ ### Framework Versions
490
+ - Python: 3.10.12
491
+ - Sentence Transformers: 3.0.0
492
+ - Transformers: 4.41.0
493
+ - PyTorch: 2.3.0+cu121
494
+ - Accelerate: 0.30.1
495
+ - Datasets: 2.19.1
496
+ - Tokenizers: 0.19.1
497
+
498
+ ## Citation
499
+
500
+ ### BibTeX
501
+
502
+ #### Sentence Transformers
503
+ ```bibtex
504
+ @inproceedings{reimers-2019-sentence-bert,
505
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
506
+ author = "Reimers, Nils and Gurevych, Iryna",
507
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
508
+ month = "11",
509
+ year = "2019",
510
+ publisher = "Association for Computational Linguistics",
511
+ url = "https://arxiv.org/abs/1908.10084",
512
+ }
513
+ ```
514
+
515
+ #### CoSENTLoss
516
+ ```bibtex
517
+ @online{kexuefm-8847,
518
+ title={CoSENT: A more efficient sentence vector scheme than Sentence-BERT},
519
+ author={Su Jianlin},
520
+ year={2022},
521
+ month={Jan},
522
+ url={https://kexue.fm/archives/8847},
523
+ }
524
+ ```
525
+
526
+ <!--
527
+ ## Glossary
528
+
529
+ *Clearly define terms in order to be accessible across audiences.*
530
+ -->
531
+
532
+ <!--
533
+ ## Model Card Authors
534
+
535
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
536
+ -->
537
+
538
+ <!--
539
+ ## Model Card Contact
540
+
541
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
542
+ -->
checkpoint-1564/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 384,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 1536,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.41.0",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 250037
26
+ }
checkpoint-1564/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.0.0",
4
+ "transformers": "4.7.0",
5
+ "pytorch": "1.9.0+cu102"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
checkpoint-1564/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a3b103c4a050ec7c20c41f4f0bdcfee82642ecfe0cd58c6268c2bfddce5abbd
3
+ size 470637416
checkpoint-1564/modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
checkpoint-1564/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4fd1f2073e2af85a9380b42823863bdcd018438654cbfa5b3aa98b918223855
3
+ size 940212218
checkpoint-1564/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87c6bc4d1d198c376a51fede7dca1cc0874e21a3307b8ae3875e98db55be87e6
3
+ size 14244
checkpoint-1564/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcc21c2650bb4344f4670f8f3de62ba81c0b68e93028800e195846665d9176d9
3
+ size 1064
checkpoint-1564/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 128,
3
+ "do_lower_case": false
4
+ }
checkpoint-1564/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
checkpoint-1564/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cad551d5600a84242d0973327029452a1e3672ba6313c2a3c3d69c4310e12719
3
+ size 17082987
checkpoint-1564/tokenizer_config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "do_lower_case": true,
48
+ "eos_token": "</s>",
49
+ "mask_token": "<mask>",
50
+ "max_length": 128,
51
+ "model_max_length": 128,
52
+ "pad_to_multiple_of": null,
53
+ "pad_token": "<pad>",
54
+ "pad_token_type_id": 0,
55
+ "padding_side": "right",
56
+ "sep_token": "</s>",
57
+ "stride": 0,
58
+ "strip_accents": null,
59
+ "tokenize_chinese_chars": true,
60
+ "tokenizer_class": "BertTokenizer",
61
+ "truncation_side": "right",
62
+ "truncation_strategy": "longest_first",
63
+ "unk_token": "<unk>"
64
+ }
checkpoint-1564/trainer_state.json ADDED
@@ -0,0 +1,512 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 5.793323516845703,
3
+ "best_model_checkpoint": "turkish-embedding-model/checkpoint-1564",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1564,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0319693094629156,
13
+ "grad_norm": 70.72708129882812,
14
+ "learning_rate": 1.1253196930946293e-06,
15
+ "loss": 17.17,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.0639386189258312,
20
+ "grad_norm": 81.68770599365234,
21
+ "learning_rate": 2.4040920716112534e-06,
22
+ "loss": 16.4932,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.0959079283887468,
27
+ "grad_norm": 109.91338348388672,
28
+ "learning_rate": 3.6828644501278778e-06,
29
+ "loss": 16.5976,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.1278772378516624,
34
+ "grad_norm": 73.892578125,
35
+ "learning_rate": 4.961636828644502e-06,
36
+ "loss": 15.6991,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.159846547314578,
41
+ "grad_norm": 79.35150909423828,
42
+ "learning_rate": 6.240409207161126e-06,
43
+ "loss": 14.876,
44
+ "step": 125
45
+ },
46
+ {
47
+ "epoch": 0.1918158567774936,
48
+ "grad_norm": 83.0904541015625,
49
+ "learning_rate": 7.5191815856777495e-06,
50
+ "loss": 14.4828,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.2237851662404092,
55
+ "grad_norm": 76.82855987548828,
56
+ "learning_rate": 8.797953964194374e-06,
57
+ "loss": 12.7061,
58
+ "step": 175
59
+ },
60
+ {
61
+ "epoch": 0.2557544757033248,
62
+ "grad_norm": 51.30181121826172,
63
+ "learning_rate": 1.0076726342710998e-05,
64
+ "loss": 10.8687,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.2877237851662404,
69
+ "grad_norm": 18.70808219909668,
70
+ "learning_rate": 1.1355498721227622e-05,
71
+ "loss": 8.3797,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 0.319693094629156,
76
+ "grad_norm": 1.3039417266845703,
77
+ "learning_rate": 1.2634271099744246e-05,
78
+ "loss": 6.2029,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.3516624040920716,
83
+ "grad_norm": 0.2324853092432022,
84
+ "learning_rate": 1.391304347826087e-05,
85
+ "loss": 5.8228,
86
+ "step": 275
87
+ },
88
+ {
89
+ "epoch": 0.3836317135549872,
90
+ "grad_norm": 0.1757364720106125,
91
+ "learning_rate": 1.5191815856777494e-05,
92
+ "loss": 5.811,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.4156010230179028,
97
+ "grad_norm": 0.1788654774427414,
98
+ "learning_rate": 1.647058823529412e-05,
99
+ "loss": 5.8079,
100
+ "step": 325
101
+ },
102
+ {
103
+ "epoch": 0.4475703324808184,
104
+ "grad_norm": 0.12862567603588104,
105
+ "learning_rate": 1.7749360613810744e-05,
106
+ "loss": 5.8077,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.479539641943734,
111
+ "grad_norm": 0.14497514069080353,
112
+ "learning_rate": 1.9028132992327367e-05,
113
+ "loss": 5.8035,
114
+ "step": 375
115
+ },
116
+ {
117
+ "epoch": 0.5115089514066496,
118
+ "grad_norm": 0.1350390762090683,
119
+ "learning_rate": 1.996589940323956e-05,
120
+ "loss": 5.8072,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.5434782608695652,
125
+ "grad_norm": 0.1435602754354477,
126
+ "learning_rate": 1.9823813583404378e-05,
127
+ "loss": 5.8033,
128
+ "step": 425
129
+ },
130
+ {
131
+ "epoch": 0.5754475703324808,
132
+ "grad_norm": 0.11389254033565521,
133
+ "learning_rate": 1.96817277635692e-05,
134
+ "loss": 5.8086,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 0.6074168797953964,
139
+ "grad_norm": 0.15821650624275208,
140
+ "learning_rate": 1.9539641943734017e-05,
141
+ "loss": 5.81,
142
+ "step": 475
143
+ },
144
+ {
145
+ "epoch": 0.639386189258312,
146
+ "grad_norm": 0.1179889366030693,
147
+ "learning_rate": 1.9397556123898838e-05,
148
+ "loss": 5.7949,
149
+ "step": 500
150
+ },
151
+ {
152
+ "epoch": 0.6713554987212276,
153
+ "grad_norm": 0.10912967473268509,
154
+ "learning_rate": 1.9255470304063656e-05,
155
+ "loss": 5.8079,
156
+ "step": 525
157
+ },
158
+ {
159
+ "epoch": 0.7033248081841432,
160
+ "grad_norm": 0.11702870577573776,
161
+ "learning_rate": 1.9113384484228477e-05,
162
+ "loss": 5.8057,
163
+ "step": 550
164
+ },
165
+ {
166
+ "epoch": 0.7352941176470589,
167
+ "grad_norm": 0.13132448494434357,
168
+ "learning_rate": 1.8971298664393295e-05,
169
+ "loss": 5.8097,
170
+ "step": 575
171
+ },
172
+ {
173
+ "epoch": 0.7672634271099744,
174
+ "grad_norm": 0.15833145380020142,
175
+ "learning_rate": 1.8829212844558116e-05,
176
+ "loss": 5.7986,
177
+ "step": 600
178
+ },
179
+ {
180
+ "epoch": 0.7992327365728901,
181
+ "grad_norm": 0.11651863902807236,
182
+ "learning_rate": 1.8687127024722937e-05,
183
+ "loss": 5.8051,
184
+ "step": 625
185
+ },
186
+ {
187
+ "epoch": 0.8312020460358056,
188
+ "grad_norm": 0.5393890142440796,
189
+ "learning_rate": 1.854504120488775e-05,
190
+ "loss": 5.8041,
191
+ "step": 650
192
+ },
193
+ {
194
+ "epoch": 0.8631713554987213,
195
+ "grad_norm": 0.6457561254501343,
196
+ "learning_rate": 1.8402955385052572e-05,
197
+ "loss": 5.7907,
198
+ "step": 675
199
+ },
200
+ {
201
+ "epoch": 0.8951406649616368,
202
+ "grad_norm": 0.5643135905265808,
203
+ "learning_rate": 1.8260869565217393e-05,
204
+ "loss": 5.7991,
205
+ "step": 700
206
+ },
207
+ {
208
+ "epoch": 0.9271099744245525,
209
+ "grad_norm": 3.214787721633911,
210
+ "learning_rate": 1.811878374538221e-05,
211
+ "loss": 5.8035,
212
+ "step": 725
213
+ },
214
+ {
215
+ "epoch": 0.959079283887468,
216
+ "grad_norm": 2.781162977218628,
217
+ "learning_rate": 1.7976697925547032e-05,
218
+ "loss": 5.7945,
219
+ "step": 750
220
+ },
221
+ {
222
+ "epoch": 0.9910485933503836,
223
+ "grad_norm": 0.38559335470199585,
224
+ "learning_rate": 1.783461210571185e-05,
225
+ "loss": 5.8077,
226
+ "step": 775
227
+ },
228
+ {
229
+ "epoch": 1.0,
230
+ "eval_loss": 5.8023600578308105,
231
+ "eval_runtime": 18.0632,
232
+ "eval_samples_per_second": 276.805,
233
+ "eval_steps_per_second": 4.374,
234
+ "eval_tr_ling_pearson_cosine": 0.017751548525136808,
235
+ "eval_tr_ling_pearson_dot": 0.025703597820631346,
236
+ "eval_tr_ling_pearson_euclidean": 0.02195284877201089,
237
+ "eval_tr_ling_pearson_manhattan": 0.02083376479528459,
238
+ "eval_tr_ling_pearson_max": 0.025703597820631346,
239
+ "eval_tr_ling_spearman_cosine": 0.027108099994157316,
240
+ "eval_tr_ling_spearman_dot": 0.03304394653738539,
241
+ "eval_tr_ling_spearman_euclidean": 0.025485959636772793,
242
+ "eval_tr_ling_spearman_manhattan": 0.024466610177699702,
243
+ "eval_tr_ling_spearman_max": 0.03304394653738539,
244
+ "step": 782
245
+ },
246
+ {
247
+ "epoch": 1.0230179028132993,
248
+ "grad_norm": 0.3645063638687134,
249
+ "learning_rate": 1.769252628587667e-05,
250
+ "loss": 5.6703,
251
+ "step": 800
252
+ },
253
+ {
254
+ "epoch": 1.054987212276215,
255
+ "grad_norm": 0.9638137817382812,
256
+ "learning_rate": 1.7550440466041488e-05,
257
+ "loss": 5.8052,
258
+ "step": 825
259
+ },
260
+ {
261
+ "epoch": 1.0869565217391304,
262
+ "grad_norm": 2.114203691482544,
263
+ "learning_rate": 1.740835464620631e-05,
264
+ "loss": 5.7936,
265
+ "step": 850
266
+ },
267
+ {
268
+ "epoch": 1.118925831202046,
269
+ "grad_norm": 1.8992066383361816,
270
+ "learning_rate": 1.7266268826371127e-05,
271
+ "loss": 5.7924,
272
+ "step": 875
273
+ },
274
+ {
275
+ "epoch": 1.1508951406649617,
276
+ "grad_norm": 2.8299577236175537,
277
+ "learning_rate": 1.7124183006535948e-05,
278
+ "loss": 5.7806,
279
+ "step": 900
280
+ },
281
+ {
282
+ "epoch": 1.1828644501278773,
283
+ "grad_norm": 1.956953525543213,
284
+ "learning_rate": 1.698209718670077e-05,
285
+ "loss": 5.7835,
286
+ "step": 925
287
+ },
288
+ {
289
+ "epoch": 1.2148337595907928,
290
+ "grad_norm": 2.658413887023926,
291
+ "learning_rate": 1.6840011366865587e-05,
292
+ "loss": 5.7619,
293
+ "step": 950
294
+ },
295
+ {
296
+ "epoch": 1.2468030690537084,
297
+ "grad_norm": 1.2760388851165771,
298
+ "learning_rate": 1.6697925547030408e-05,
299
+ "loss": 5.8038,
300
+ "step": 975
301
+ },
302
+ {
303
+ "epoch": 1.278772378516624,
304
+ "grad_norm": 1.7434897422790527,
305
+ "learning_rate": 1.6555839727195226e-05,
306
+ "loss": 5.779,
307
+ "step": 1000
308
+ },
309
+ {
310
+ "epoch": 1.3107416879795397,
311
+ "grad_norm": 1.3532071113586426,
312
+ "learning_rate": 1.6413753907360047e-05,
313
+ "loss": 5.7904,
314
+ "step": 1025
315
+ },
316
+ {
317
+ "epoch": 1.3427109974424551,
318
+ "grad_norm": 3.7385997772216797,
319
+ "learning_rate": 1.6271668087524864e-05,
320
+ "loss": 5.7696,
321
+ "step": 1050
322
+ },
323
+ {
324
+ "epoch": 1.3746803069053708,
325
+ "grad_norm": 0.9061102867126465,
326
+ "learning_rate": 1.6129582267689685e-05,
327
+ "loss": 5.7919,
328
+ "step": 1075
329
+ },
330
+ {
331
+ "epoch": 1.4066496163682864,
332
+ "grad_norm": 2.7104809284210205,
333
+ "learning_rate": 1.5987496447854503e-05,
334
+ "loss": 5.7785,
335
+ "step": 1100
336
+ },
337
+ {
338
+ "epoch": 1.438618925831202,
339
+ "grad_norm": 1.7147830724716187,
340
+ "learning_rate": 1.5845410628019324e-05,
341
+ "loss": 5.7862,
342
+ "step": 1125
343
+ },
344
+ {
345
+ "epoch": 1.4705882352941178,
346
+ "grad_norm": 2.525214672088623,
347
+ "learning_rate": 1.5703324808184145e-05,
348
+ "loss": 5.7703,
349
+ "step": 1150
350
+ },
351
+ {
352
+ "epoch": 1.5025575447570332,
353
+ "grad_norm": 1.7794997692108154,
354
+ "learning_rate": 1.5561238988348963e-05,
355
+ "loss": 5.773,
356
+ "step": 1175
357
+ },
358
+ {
359
+ "epoch": 1.5345268542199488,
360
+ "grad_norm": 4.901644229888916,
361
+ "learning_rate": 1.5419153168513784e-05,
362
+ "loss": 5.7627,
363
+ "step": 1200
364
+ },
365
+ {
366
+ "epoch": 1.5664961636828645,
367
+ "grad_norm": 3.360812187194824,
368
+ "learning_rate": 1.52770673486786e-05,
369
+ "loss": 5.7596,
370
+ "step": 1225
371
+ },
372
+ {
373
+ "epoch": 1.59846547314578,
374
+ "grad_norm": 1.2768888473510742,
375
+ "learning_rate": 1.5134981528843423e-05,
376
+ "loss": 5.7882,
377
+ "step": 1250
378
+ },
379
+ {
380
+ "epoch": 1.6304347826086958,
381
+ "grad_norm": 2.206226348876953,
382
+ "learning_rate": 1.4992895709008242e-05,
383
+ "loss": 5.7828,
384
+ "step": 1275
385
+ },
386
+ {
387
+ "epoch": 1.6624040920716112,
388
+ "grad_norm": 1.4602406024932861,
389
+ "learning_rate": 1.4850809889173061e-05,
390
+ "loss": 5.771,
391
+ "step": 1300
392
+ },
393
+ {
394
+ "epoch": 1.6943734015345269,
395
+ "grad_norm": 1.1597537994384766,
396
+ "learning_rate": 1.4708724069337881e-05,
397
+ "loss": 5.788,
398
+ "step": 1325
399
+ },
400
+ {
401
+ "epoch": 1.7263427109974425,
402
+ "grad_norm": 3.7494003772735596,
403
+ "learning_rate": 1.45666382495027e-05,
404
+ "loss": 5.7719,
405
+ "step": 1350
406
+ },
407
+ {
408
+ "epoch": 1.758312020460358,
409
+ "grad_norm": 1.6271498203277588,
410
+ "learning_rate": 1.442455242966752e-05,
411
+ "loss": 5.7846,
412
+ "step": 1375
413
+ },
414
+ {
415
+ "epoch": 1.7902813299232738,
416
+ "grad_norm": 2.0469117164611816,
417
+ "learning_rate": 1.4282466609832339e-05,
418
+ "loss": 5.7838,
419
+ "step": 1400
420
+ },
421
+ {
422
+ "epoch": 1.8222506393861893,
423
+ "grad_norm": 2.533921003341675,
424
+ "learning_rate": 1.4140380789997158e-05,
425
+ "loss": 5.7912,
426
+ "step": 1425
427
+ },
428
+ {
429
+ "epoch": 1.854219948849105,
430
+ "grad_norm": 3.291757583618164,
431
+ "learning_rate": 1.3998294970161978e-05,
432
+ "loss": 5.7686,
433
+ "step": 1450
434
+ },
435
+ {
436
+ "epoch": 1.8861892583120206,
437
+ "grad_norm": 3.0181350708007812,
438
+ "learning_rate": 1.3856209150326799e-05,
439
+ "loss": 5.7938,
440
+ "step": 1475
441
+ },
442
+ {
443
+ "epoch": 1.918158567774936,
444
+ "grad_norm": 2.553502321243286,
445
+ "learning_rate": 1.3714123330491618e-05,
446
+ "loss": 5.7847,
447
+ "step": 1500
448
+ },
449
+ {
450
+ "epoch": 1.9501278772378516,
451
+ "grad_norm": 1.8034719228744507,
452
+ "learning_rate": 1.3572037510656438e-05,
453
+ "loss": 5.7952,
454
+ "step": 1525
455
+ },
456
+ {
457
+ "epoch": 1.9820971867007673,
458
+ "grad_norm": 3.7138864994049072,
459
+ "learning_rate": 1.3429951690821257e-05,
460
+ "loss": 5.7528,
461
+ "step": 1550
462
+ },
463
+ {
464
+ "epoch": 2.0,
465
+ "eval_loss": 5.793323516845703,
466
+ "eval_runtime": 18.2796,
467
+ "eval_samples_per_second": 273.528,
468
+ "eval_steps_per_second": 4.322,
469
+ "eval_tr_ling_pearson_cosine": 0.037604255015168134,
470
+ "eval_tr_ling_pearson_dot": 0.0673696846368413,
471
+ "eval_tr_ling_pearson_euclidean": 0.03698411306484619,
472
+ "eval_tr_ling_pearson_manhattan": 0.034740275152181296,
473
+ "eval_tr_ling_pearson_max": 0.0673696846368413,
474
+ "eval_tr_ling_spearman_cosine": 0.04804112988506346,
475
+ "eval_tr_ling_spearman_dot": 0.06818119362900125,
476
+ "eval_tr_ling_spearman_euclidean": 0.03903062430281842,
477
+ "eval_tr_ling_spearman_manhattan": 0.03769766156967754,
478
+ "eval_tr_ling_spearman_max": 0.06818119362900125,
479
+ "step": 1564
480
+ }
481
+ ],
482
+ "logging_steps": 25,
483
+ "max_steps": 3910,
484
+ "num_input_tokens_seen": 0,
485
+ "num_train_epochs": 5,
486
+ "save_steps": 500,
487
+ "stateful_callbacks": {
488
+ "EarlyStoppingCallback": {
489
+ "args": {
490
+ "early_stopping_patience": 5,
491
+ "early_stopping_threshold": 0.01
492
+ },
493
+ "attributes": {
494
+ "early_stopping_patience_counter": 0
495
+ }
496
+ },
497
+ "TrainerControl": {
498
+ "args": {
499
+ "should_epoch_stop": false,
500
+ "should_evaluate": false,
501
+ "should_log": false,
502
+ "should_save": true,
503
+ "should_training_stop": false
504
+ },
505
+ "attributes": {}
506
+ }
507
+ },
508
+ "total_flos": 0.0,
509
+ "train_batch_size": 32,
510
+ "trial_name": null,
511
+ "trial_params": null
512
+ }
checkpoint-1564/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d44b0bec8869c08bdad0d597184d7a293a0e13eb770d6f4384456cbbe4fe5aa4
3
+ size 5368
checkpoint-1564/unigram.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da145b5e7700ae40f16691ec32a0b1fdc1ee3298db22a31ea55f57a966c4a65d
3
+ size 14763260
checkpoint-3128/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-3128/README.md ADDED
@@ -0,0 +1,610 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - multilingual
4
+ - zh
5
+ - ja
6
+ - ar
7
+ - ko
8
+ - de
9
+ - fr
10
+ - es
11
+ - pt
12
+ - hi
13
+ - id
14
+ - it
15
+ - tr
16
+ - ru
17
+ - bn
18
+ - ur
19
+ - mr
20
+ - ta
21
+ - vi
22
+ - fa
23
+ - pl
24
+ - uk
25
+ - nl
26
+ - sv
27
+ - he
28
+ - sw
29
+ - ps
30
+ library_name: sentence-transformers
31
+ tags:
32
+ - sentence-transformers
33
+ - sentence-similarity
34
+ - feature-extraction
35
+ - dataset_size:10K<n<100K
36
+ - loss:CoSENTLoss
37
+ base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
38
+ metrics:
39
+ - pearson_cosine
40
+ - spearman_cosine
41
+ - pearson_manhattan
42
+ - spearman_manhattan
43
+ - pearson_euclidean
44
+ - spearman_euclidean
45
+ - pearson_dot
46
+ - spearman_dot
47
+ - pearson_max
48
+ - spearman_max
49
+ widget:
50
+ - source_sentence: Bottomless Mug
51
+ sentences:
52
+ - You are always safe.
53
+ - That trend isn't very known yet
54
+ - Eleanor Clift göreve koşuyor.
55
+ - source_sentence: Tripp has a job.
56
+ sentences:
57
+ - They are having money problems.
58
+ - Malignite aniden ortaya çıkar.
59
+ - Mezarlar derin ormanlarda saklandı.
60
+ - source_sentence: There are rules
61
+ sentences:
62
+ - There are more villians than heros.
63
+ - The directions should be read.
64
+ - Mezarlar derin ormanlarda saklandı.
65
+ - source_sentence: K is a musician.
66
+ sentences:
67
+ - Klimt draws hotdogs.
68
+ - Ed Wood hiç mahkemeye çıkmadı.
69
+ - Çeçen Rusya yönetimi ele geçirdi.
70
+ - source_sentence: We moved closer.
71
+ sentences:
72
+ - Clinton is unaware of the process.
73
+ - Nesil deneyimleri anlamsızdır.
74
+ - Hormonların etkileri vardır.
75
+ pipeline_tag: sentence-similarity
76
+ model-index:
77
+ - name: SentenceTransformer based on sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
78
+ results:
79
+ - task:
80
+ type: semantic-similarity
81
+ name: Semantic Similarity
82
+ dataset:
83
+ name: tr ling
84
+ type: tr_ling
85
+ metrics:
86
+ - type: pearson_cosine
87
+ value: 0.058743115070889876
88
+ name: Pearson Cosine
89
+ - type: spearman_cosine
90
+ value: 0.059526247945378225
91
+ name: Spearman Cosine
92
+ - type: pearson_manhattan
93
+ value: 0.04582145815494953
94
+ name: Pearson Manhattan
95
+ - type: spearman_manhattan
96
+ value: 0.04331287037397966
97
+ name: Spearman Manhattan
98
+ - type: pearson_euclidean
99
+ value: 0.04709170917685587
100
+ name: Pearson Euclidean
101
+ - type: spearman_euclidean
102
+ value: 0.04407504959649961
103
+ name: Spearman Euclidean
104
+ - type: pearson_dot
105
+ value: 0.08477622619519222
106
+ name: Pearson Dot
107
+ - type: spearman_dot
108
+ value: 0.08243745050110735
109
+ name: Spearman Dot
110
+ - type: pearson_max
111
+ value: 0.08477622619519222
112
+ name: Pearson Max
113
+ - type: spearman_max
114
+ value: 0.08243745050110735
115
+ name: Spearman Max
116
+ ---
117
+
118
+ # SentenceTransformer based on sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
119
+
120
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) on the [MoritzLaurer/multilingual-nli-26lang-2mil7](https://huggingface.co/datasets/MoritzLaurer/multilingual-nli-26lang-2mil7) dataset. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
121
+
122
+ ## Model Details
123
+
124
+ ### Model Description
125
+ - **Model Type:** Sentence Transformer
126
+ - **Base model:** [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) <!-- at revision bf3bf13ab40c3157080a7ab344c831b9ad18b5eb -->
127
+ - **Maximum Sequence Length:** 128 tokens
128
+ - **Output Dimensionality:** 384 tokens
129
+ - **Similarity Function:** Cosine Similarity
130
+ - **Training Dataset:**
131
+ - [MoritzLaurer/multilingual-nli-26lang-2mil7](https://huggingface.co/datasets/MoritzLaurer/multilingual-nli-26lang-2mil7)
132
+ - **Languages:** multilingual, zh, ja, ar, ko, de, fr, es, pt, hi, id, it, tr, ru, bn, ur, mr, ta, vi, fa, pl, uk, nl, sv, he, sw, ps
133
+ <!-- - **License:** Unknown -->
134
+
135
+ ### Model Sources
136
+
137
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
138
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
139
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
140
+
141
+ ### Full Model Architecture
142
+
143
+ ```
144
+ SentenceTransformer(
145
+ (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
146
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
147
+ )
148
+ ```
149
+
150
+ ## Usage
151
+
152
+ ### Direct Usage (Sentence Transformers)
153
+
154
+ First install the Sentence Transformers library:
155
+
156
+ ```bash
157
+ pip install -U sentence-transformers
158
+ ```
159
+
160
+ Then you can load this model and run inference.
161
+ ```python
162
+ from sentence_transformers import SentenceTransformer
163
+
164
+ # Download from the 🤗 Hub
165
+ model = SentenceTransformer("sentence_transformers_model_id")
166
+ # Run inference
167
+ sentences = [
168
+ 'We moved closer.',
169
+ 'Clinton is unaware of the process.',
170
+ 'Nesil deneyimleri anlamsızdır.',
171
+ ]
172
+ embeddings = model.encode(sentences)
173
+ print(embeddings.shape)
174
+ # [3, 384]
175
+
176
+ # Get the similarity scores for the embeddings
177
+ similarities = model.similarity(embeddings, embeddings)
178
+ print(similarities.shape)
179
+ # [3, 3]
180
+ ```
181
+
182
+ <!--
183
+ ### Direct Usage (Transformers)
184
+
185
+ <details><summary>Click to see the direct usage in Transformers</summary>
186
+
187
+ </details>
188
+ -->
189
+
190
+ <!--
191
+ ### Downstream Usage (Sentence Transformers)
192
+
193
+ You can finetune this model on your own dataset.
194
+
195
+ <details><summary>Click to expand</summary>
196
+
197
+ </details>
198
+ -->
199
+
200
+ <!--
201
+ ### Out-of-Scope Use
202
+
203
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
204
+ -->
205
+
206
+ ## Evaluation
207
+
208
+ ### Metrics
209
+
210
+ #### Semantic Similarity
211
+ * Dataset: `tr_ling`
212
+ * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
213
+
214
+ | Metric | Value |
215
+ |:-------------------|:-----------|
216
+ | pearson_cosine | 0.0587 |
217
+ | spearman_cosine | 0.0595 |
218
+ | pearson_manhattan | 0.0458 |
219
+ | spearman_manhattan | 0.0433 |
220
+ | pearson_euclidean | 0.0471 |
221
+ | spearman_euclidean | 0.0441 |
222
+ | pearson_dot | 0.0848 |
223
+ | spearman_dot | 0.0824 |
224
+ | pearson_max | 0.0848 |
225
+ | **spearman_max** | **0.0824** |
226
+
227
+ <!--
228
+ ## Bias, Risks and Limitations
229
+
230
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
231
+ -->
232
+
233
+ <!--
234
+ ### Recommendations
235
+
236
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
237
+ -->
238
+
239
+ ## Training Details
240
+
241
+ ### Training Dataset
242
+
243
+ #### MoritzLaurer/multilingual-nli-26lang-2mil7
244
+
245
+ * Dataset: [MoritzLaurer/multilingual-nli-26lang-2mil7](https://huggingface.co/datasets/MoritzLaurer/multilingual-nli-26lang-2mil7) at [510a233](https://huggingface.co/datasets/MoritzLaurer/multilingual-nli-26lang-2mil7/tree/510a233972a0d7ff0f767d82f46e046832c10538)
246
+ * Size: 25,000 training samples
247
+ * Columns: <code>premise_original</code>, <code>hypothesis_original</code>, <code>score</code>, <code>sentence1</code>, and <code>sentence2</code>
248
+ * Approximate statistics based on the first 1000 samples:
249
+ | | premise_original | hypothesis_original | score | sentence1 | sentence2 |
250
+ |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
251
+ | type | string | string | int | string | string |
252
+ | details | <ul><li>min: 4 tokens</li><li>mean: 29.3 tokens</li><li>max: 107 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 15.62 tokens</li><li>max: 40 tokens</li></ul> | <ul><li>0: ~34.50%</li><li>1: ~33.30%</li><li>2: ~32.20%</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 28.28 tokens</li><li>max: 101 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 15.39 tokens</li><li>max: 38 tokens</li></ul> |
253
+ * Samples:
254
+ | premise_original | hypothesis_original | score | sentence1 | sentence2 |
255
+ |:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------|
256
+ | <code>N, the total number of LC50 values used in calculating the CV(%) varied with organism and toxicant because some data were rejected due to water hardness, lack of concentration measurements, and/or because some of the LC50s were not calculable.</code> | <code>Most discarded data was rejected due to water hardness.</code> | <code>1</code> | <code>N, CV'nin hesaplanmasında kullanılan LC50 değerlerinin toplam sayısı (%) organizma ve toksik madde ile çeşitlidir, çünkü bazı veriler su sertliği, konsantrasyon ölçümlerinin eksikliği ve / veya LC50'lerin bazıları hesaplanamaz olduğu için reddedilmiştir.</code> | <code>Atılan verilerin çoğu su sertliği nedeniyle reddedildi.</code> |
257
+ | <code>As the home of the Venus de Milo and Mona Lisa, the Louvre drew almost unmanageable crowds until President Mitterrand ordered its re-organization in the 1980s.</code> | <code>The Louvre is home of the Venus de Milo and Mona Lisa.</code> | <code>0</code> | <code>Venus de Milo ve Mona Lisa'nın evi olarak Louvre, Başkan Mitterrand'ın 1980'lerde yeniden düzenlenmesini emredene kadar neredeyse yönetilemez kalabalıklar çekti.</code> | <code>Louvre, Venus de Milo ve Mona Lisa'nın evidir.</code> |
258
+ | <code>A year ago, the wife of the Oxford don noticed that the pattern on Kleenex quilted tissue uncannily resembled the Penrose Arrowed Rhombi tilings pattern, which Sir Roger had invented--and copyrighted--in 1974.</code> | <code>It has been recently found out a similarity between the pattern on the recent Kleenex quilted tissue and the one of the Penrose Arrowed Rhombi tilings.</code> | <code>0</code> | <code>Bir yıl önce Oxford'un karısı, Kleenex kapitone dokudaki desenin 1974'te Sir Roger'ın icat ettiği -ve telif hakkı olan - Penrose Arrowed Rhombi tilings desenine benzediğini fark etti.</code> | <code>Yakın zamanda, son Kleenex kapitone dokudaki desen ile Penrose Arrowed Rhombi döşemelerinden biri arasında bir benzerlik bulunmuştur.</code> |
259
+ * Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
260
+ ```json
261
+ {
262
+ "scale": 20.0,
263
+ "similarity_fct": "pairwise_cos_sim"
264
+ }
265
+ ```
266
+
267
+ ### Evaluation Dataset
268
+
269
+ #### MoritzLaurer/multilingual-nli-26lang-2mil7
270
+
271
+ * Dataset: [MoritzLaurer/multilingual-nli-26lang-2mil7](https://huggingface.co/datasets/MoritzLaurer/multilingual-nli-26lang-2mil7) at [510a233](https://huggingface.co/datasets/MoritzLaurer/multilingual-nli-26lang-2mil7/tree/510a233972a0d7ff0f767d82f46e046832c10538)
272
+ * Size: 5,000 evaluation samples
273
+ * Columns: <code>premise_original</code>, <code>hypothesis_original</code>, <code>score</code>, <code>sentence1</code>, and <code>sentence2</code>
274
+ * Approximate statistics based on the first 1000 samples:
275
+ | | premise_original | hypothesis_original | score | sentence1 | sentence2 |
276
+ |:--------|:---------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
277
+ | type | string | string | int | string | string |
278
+ | details | <ul><li>min: 5 tokens</li><li>mean: 30.3 tokens</li><li>max: 99 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 15.11 tokens</li><li>max: 56 tokens</li></ul> | <ul><li>0: ~34.50%</li><li>1: ~29.90%</li><li>2: ~35.60%</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 29.94 tokens</li><li>max: 106 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 15.29 tokens</li><li>max: 52 tokens</li></ul> |
279
+ * Samples:
280
+ | premise_original | hypothesis_original | score | sentence1 | sentence2 |
281
+ |:----------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------|:---------------|:------------------------------------------------------------------------------|:-----------------------------------------------------------------|
282
+ | <code>But the racism charge isn't quirky or wacky--it's demagogy.</code> | <code>The accusation of prejudice based on a pedestrian kind of hatred.</code> | <code>0</code> | <code>Ama ırkçılık suçlaması tuhaf ya da tuhaf değil, bu bir demagoji.</code> | <code>Yaya nefretine dayanan önyargı suçlaması.</code> |
283
+ | <code>Why would Gates allow the publication of such a book with his byline and photo on the dust jacket?</code> | <code>Gates' byline and photo are on the dust jacket</code> | <code>0</code> | <code>Gates neden böyle bir kitabın basılmasına izin versin ki?</code> | <code>Gates'in çizgisi ve fotoğrafı toz ceketin üzerinde.</code> |
284
+ | <code>I am a nonsmoker and allergic to cigarette smoke.</code> | <code>I do not smoke.</code> | <code>0</code> | <code>Sigara içmeyen biriyim ve sigara dumanına alerjim var.</code> | <code>Sigara içmiyorum.</code> |
285
+ * Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
286
+ ```json
287
+ {
288
+ "scale": 20.0,
289
+ "similarity_fct": "pairwise_cos_sim"
290
+ }
291
+ ```
292
+
293
+ ### Training Hyperparameters
294
+ #### Non-Default Hyperparameters
295
+
296
+ - `eval_strategy`: epoch
297
+ - `per_device_train_batch_size`: 32
298
+ - `per_device_eval_batch_size`: 64
299
+ - `learning_rate`: 2e-05
300
+ - `num_train_epochs`: 5
301
+ - `warmup_ratio`: 0.1
302
+ - `fp16`: True
303
+ - `load_best_model_at_end`: True
304
+ - `ddp_find_unused_parameters`: False
305
+
306
+ #### All Hyperparameters
307
+ <details><summary>Click to expand</summary>
308
+
309
+ - `overwrite_output_dir`: False
310
+ - `do_predict`: False
311
+ - `eval_strategy`: epoch
312
+ - `prediction_loss_only`: True
313
+ - `per_device_train_batch_size`: 32
314
+ - `per_device_eval_batch_size`: 64
315
+ - `per_gpu_train_batch_size`: None
316
+ - `per_gpu_eval_batch_size`: None
317
+ - `gradient_accumulation_steps`: 1
318
+ - `eval_accumulation_steps`: None
319
+ - `learning_rate`: 2e-05
320
+ - `weight_decay`: 0.0
321
+ - `adam_beta1`: 0.9
322
+ - `adam_beta2`: 0.999
323
+ - `adam_epsilon`: 1e-08
324
+ - `max_grad_norm`: 1.0
325
+ - `num_train_epochs`: 5
326
+ - `max_steps`: -1
327
+ - `lr_scheduler_type`: linear
328
+ - `lr_scheduler_kwargs`: {}
329
+ - `warmup_ratio`: 0.1
330
+ - `warmup_steps`: 0
331
+ - `log_level`: passive
332
+ - `log_level_replica`: warning
333
+ - `log_on_each_node`: True
334
+ - `logging_nan_inf_filter`: True
335
+ - `save_safetensors`: True
336
+ - `save_on_each_node`: False
337
+ - `save_only_model`: False
338
+ - `restore_callback_states_from_checkpoint`: False
339
+ - `no_cuda`: False
340
+ - `use_cpu`: False
341
+ - `use_mps_device`: False
342
+ - `seed`: 42
343
+ - `data_seed`: None
344
+ - `jit_mode_eval`: False
345
+ - `use_ipex`: False
346
+ - `bf16`: False
347
+ - `fp16`: True
348
+ - `fp16_opt_level`: O1
349
+ - `half_precision_backend`: auto
350
+ - `bf16_full_eval`: False
351
+ - `fp16_full_eval`: False
352
+ - `tf32`: None
353
+ - `local_rank`: 0
354
+ - `ddp_backend`: None
355
+ - `tpu_num_cores`: None
356
+ - `tpu_metrics_debug`: False
357
+ - `debug`: []
358
+ - `dataloader_drop_last`: False
359
+ - `dataloader_num_workers`: 0
360
+ - `dataloader_prefetch_factor`: None
361
+ - `past_index`: -1
362
+ - `disable_tqdm`: False
363
+ - `remove_unused_columns`: True
364
+ - `label_names`: None
365
+ - `load_best_model_at_end`: True
366
+ - `ignore_data_skip`: False
367
+ - `fsdp`: []
368
+ - `fsdp_min_num_params`: 0
369
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
370
+ - `fsdp_transformer_layer_cls_to_wrap`: None
371
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
372
+ - `deepspeed`: None
373
+ - `label_smoothing_factor`: 0.0
374
+ - `optim`: adamw_torch
375
+ - `optim_args`: None
376
+ - `adafactor`: False
377
+ - `group_by_length`: False
378
+ - `length_column_name`: length
379
+ - `ddp_find_unused_parameters`: False
380
+ - `ddp_bucket_cap_mb`: None
381
+ - `ddp_broadcast_buffers`: False
382
+ - `dataloader_pin_memory`: True
383
+ - `dataloader_persistent_workers`: False
384
+ - `skip_memory_metrics`: True
385
+ - `use_legacy_prediction_loop`: False
386
+ - `push_to_hub`: False
387
+ - `resume_from_checkpoint`: None
388
+ - `hub_model_id`: None
389
+ - `hub_strategy`: every_save
390
+ - `hub_private_repo`: False
391
+ - `hub_always_push`: False
392
+ - `gradient_checkpointing`: False
393
+ - `gradient_checkpointing_kwargs`: None
394
+ - `include_inputs_for_metrics`: False
395
+ - `eval_do_concat_batches`: True
396
+ - `fp16_backend`: auto
397
+ - `push_to_hub_model_id`: None
398
+ - `push_to_hub_organization`: None
399
+ - `mp_parameters`:
400
+ - `auto_find_batch_size`: False
401
+ - `full_determinism`: False
402
+ - `torchdynamo`: None
403
+ - `ray_scope`: last
404
+ - `ddp_timeout`: 1800
405
+ - `torch_compile`: False
406
+ - `torch_compile_backend`: None
407
+ - `torch_compile_mode`: None
408
+ - `dispatch_batches`: None
409
+ - `split_batches`: None
410
+ - `include_tokens_per_second`: False
411
+ - `include_num_input_tokens_seen`: False
412
+ - `neftune_noise_alpha`: None
413
+ - `optim_target_modules`: None
414
+ - `batch_eval_metrics`: False
415
+ - `batch_sampler`: batch_sampler
416
+ - `multi_dataset_batch_sampler`: proportional
417
+
418
+ </details>
419
+
420
+ ### Training Logs
421
+ <details><summary>Click to expand</summary>
422
+
423
+ | Epoch | Step | Training Loss | loss | tr_ling_spearman_max |
424
+ |:------:|:----:|:-------------:|:------:|:--------------------:|
425
+ | 0.0320 | 25 | 17.17 | - | - |
426
+ | 0.0639 | 50 | 16.4932 | - | - |
427
+ | 0.0959 | 75 | 16.5976 | - | - |
428
+ | 0.1279 | 100 | 15.6991 | - | - |
429
+ | 0.1598 | 125 | 14.876 | - | - |
430
+ | 0.1918 | 150 | 14.4828 | - | - |
431
+ | 0.2238 | 175 | 12.7061 | - | - |
432
+ | 0.2558 | 200 | 10.8687 | - | - |
433
+ | 0.2877 | 225 | 8.3797 | - | - |
434
+ | 0.3197 | 250 | 6.2029 | - | - |
435
+ | 0.3517 | 275 | 5.8228 | - | - |
436
+ | 0.3836 | 300 | 5.811 | - | - |
437
+ | 0.4156 | 325 | 5.8079 | - | - |
438
+ | 0.4476 | 350 | 5.8077 | - | - |
439
+ | 0.4795 | 375 | 5.8035 | - | - |
440
+ | 0.5115 | 400 | 5.8072 | - | - |
441
+ | 0.5435 | 425 | 5.8033 | - | - |
442
+ | 0.5754 | 450 | 5.8086 | - | - |
443
+ | 0.6074 | 475 | 5.81 | - | - |
444
+ | 0.6394 | 500 | 5.7949 | - | - |
445
+ | 0.6714 | 525 | 5.8079 | - | - |
446
+ | 0.7033 | 550 | 5.8057 | - | - |
447
+ | 0.7353 | 575 | 5.8097 | - | - |
448
+ | 0.7673 | 600 | 5.7986 | - | - |
449
+ | 0.7992 | 625 | 5.8051 | - | - |
450
+ | 0.8312 | 650 | 5.8041 | - | - |
451
+ | 0.8632 | 675 | 5.7907 | - | - |
452
+ | 0.8951 | 700 | 5.7991 | - | - |
453
+ | 0.9271 | 725 | 5.8035 | - | - |
454
+ | 0.9591 | 750 | 5.7945 | - | - |
455
+ | 0.9910 | 775 | 5.8077 | - | - |
456
+ | 1.0 | 782 | - | 5.8024 | 0.0330 |
457
+ | 1.0230 | 800 | 5.6703 | - | - |
458
+ | 1.0550 | 825 | 5.8052 | - | - |
459
+ | 1.0870 | 850 | 5.7936 | - | - |
460
+ | 1.1189 | 875 | 5.7924 | - | - |
461
+ | 1.1509 | 900 | 5.7806 | - | - |
462
+ | 1.1829 | 925 | 5.7835 | - | - |
463
+ | 1.2148 | 950 | 5.7619 | - | - |
464
+ | 1.2468 | 975 | 5.8038 | - | - |
465
+ | 1.2788 | 1000 | 5.779 | - | - |
466
+ | 1.3107 | 1025 | 5.7904 | - | - |
467
+ | 1.3427 | 1050 | 5.7696 | - | - |
468
+ | 1.3747 | 1075 | 5.7919 | - | - |
469
+ | 1.4066 | 1100 | 5.7785 | - | - |
470
+ | 1.4386 | 1125 | 5.7862 | - | - |
471
+ | 1.4706 | 1150 | 5.7703 | - | - |
472
+ | 1.5026 | 1175 | 5.773 | - | - |
473
+ | 1.5345 | 1200 | 5.7627 | - | - |
474
+ | 1.5665 | 1225 | 5.7596 | - | - |
475
+ | 1.5985 | 1250 | 5.7882 | - | - |
476
+ | 1.6304 | 1275 | 5.7828 | - | - |
477
+ | 1.6624 | 1300 | 5.771 | - | - |
478
+ | 1.6944 | 1325 | 5.788 | - | - |
479
+ | 1.7263 | 1350 | 5.7719 | - | - |
480
+ | 1.7583 | 1375 | 5.7846 | - | - |
481
+ | 1.7903 | 1400 | 5.7838 | - | - |
482
+ | 1.8223 | 1425 | 5.7912 | - | - |
483
+ | 1.8542 | 1450 | 5.7686 | - | - |
484
+ | 1.8862 | 1475 | 5.7938 | - | - |
485
+ | 1.9182 | 1500 | 5.7847 | - | - |
486
+ | 1.9501 | 1525 | 5.7952 | - | - |
487
+ | 1.9821 | 1550 | 5.7528 | - | - |
488
+ | 2.0 | 1564 | - | 5.7933 | 0.0682 |
489
+ | 2.0141 | 1575 | 5.65 | - | - |
490
+ | 2.0460 | 1600 | 5.7537 | - | - |
491
+ | 2.0780 | 1625 | 5.7098 | - | - |
492
+ | 2.1100 | 1650 | 5.7149 | - | - |
493
+ | 2.1419 | 1675 | 5.7585 | - | - |
494
+ | 2.1739 | 1700 | 5.7277 | - | - |
495
+ | 2.2059 | 1725 | 5.7482 | - | - |
496
+ | 2.2379 | 1750 | 5.7115 | - | - |
497
+ | 2.2698 | 1775 | 5.6895 | - | - |
498
+ | 2.3018 | 1800 | 5.7389 | - | - |
499
+ | 2.3338 | 1825 | 5.7161 | - | - |
500
+ | 2.3657 | 1850 | 5.7123 | - | - |
501
+ | 2.3977 | 1875 | 5.7322 | - | - |
502
+ | 2.4297 | 1900 | 5.7421 | - | - |
503
+ | 2.4616 | 1925 | 5.7615 | - | - |
504
+ | 2.4936 | 1950 | 5.7493 | - | - |
505
+ | 2.5256 | 1975 | 5.7298 | - | - |
506
+ | 2.5575 | 2000 | 5.7529 | - | - |
507
+ | 2.5895 | 2025 | 5.7318 | - | - |
508
+ | 2.6215 | 2050 | 5.7036 | - | - |
509
+ | 2.6535 | 2075 | 5.7158 | - | - |
510
+ | 2.6854 | 2100 | 5.7209 | - | - |
511
+ | 2.7174 | 2125 | 5.738 | - | - |
512
+ | 2.7494 | 2150 | 5.7337 | - | - |
513
+ | 2.7813 | 2175 | 5.713 | - | - |
514
+ | 2.8133 | 2200 | 5.7257 | - | - |
515
+ | 2.8453 | 2225 | 5.6958 | - | - |
516
+ | 2.8772 | 2250 | 5.7053 | - | - |
517
+ | 2.9092 | 2275 | 5.7246 | - | - |
518
+ | 2.9412 | 2300 | 5.7291 | - | - |
519
+ | 2.9731 | 2325 | 5.7139 | - | - |
520
+ | 3.0 | 2346 | - | 5.8510 | 0.0837 |
521
+ | 3.0051 | 2350 | 5.5715 | - | - |
522
+ | 3.0371 | 2375 | 5.6558 | - | - |
523
+ | 3.0691 | 2400 | 5.6441 | - | - |
524
+ | 3.1010 | 2425 | 5.6569 | - | - |
525
+ | 3.1330 | 2450 | 5.669 | - | - |
526
+ | 3.1650 | 2475 | 5.6361 | - | - |
527
+ | 3.1969 | 2500 | 5.6524 | - | - |
528
+ | 3.2289 | 2525 | 5.6773 | - | - |
529
+ | 3.2609 | 2550 | 5.6552 | - | - |
530
+ | 3.2928 | 2575 | 5.6807 | - | - |
531
+ | 3.3248 | 2600 | 5.6638 | - | - |
532
+ | 3.3568 | 2625 | 5.6582 | - | - |
533
+ | 3.3887 | 2650 | 5.658 | - | - |
534
+ | 3.4207 | 2675 | 5.6626 | - | - |
535
+ | 3.4527 | 2700 | 5.6802 | - | - |
536
+ | 3.4847 | 2725 | 5.6377 | - | - |
537
+ | 3.5166 | 2750 | 5.6752 | - | - |
538
+ | 3.5486 | 2775 | 5.6573 | - | - |
539
+ | 3.5806 | 2800 | 5.6963 | - | - |
540
+ | 3.6125 | 2825 | 5.7007 | - | - |
541
+ | 3.6445 | 2850 | 5.6746 | - | - |
542
+ | 3.6765 | 2875 | 5.6312 | - | - |
543
+ | 3.7084 | 2900 | 5.5596 | - | - |
544
+ | 3.7404 | 2925 | 5.7003 | - | - |
545
+ | 3.7724 | 2950 | 5.6739 | - | - |
546
+ | 3.8043 | 2975 | 5.655 | - | - |
547
+ | 3.8363 | 3000 | 5.6787 | - | - |
548
+ | 3.8683 | 3025 | 5.643 | - | - |
549
+ | 3.9003 | 3050 | 5.6412 | - | - |
550
+ | 3.9322 | 3075 | 5.758 | - | - |
551
+ | 3.9642 | 3100 | 5.6769 | - | - |
552
+ | 3.9962 | 3125 | 5.7206 | - | - |
553
+ | 4.0 | 3128 | - | 5.9125 | 0.0824 |
554
+
555
+ </details>
556
+
557
+ ### Framework Versions
558
+ - Python: 3.10.12
559
+ - Sentence Transformers: 3.0.0
560
+ - Transformers: 4.41.0
561
+ - PyTorch: 2.3.0+cu121
562
+ - Accelerate: 0.30.1
563
+ - Datasets: 2.19.1
564
+ - Tokenizers: 0.19.1
565
+
566
+ ## Citation
567
+
568
+ ### BibTeX
569
+
570
+ #### Sentence Transformers
571
+ ```bibtex
572
+ @inproceedings{reimers-2019-sentence-bert,
573
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
574
+ author = "Reimers, Nils and Gurevych, Iryna",
575
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
576
+ month = "11",
577
+ year = "2019",
578
+ publisher = "Association for Computational Linguistics",
579
+ url = "https://arxiv.org/abs/1908.10084",
580
+ }
581
+ ```
582
+
583
+ #### CoSENTLoss
584
+ ```bibtex
585
+ @online{kexuefm-8847,
586
+ title={CoSENT: A more efficient sentence vector scheme than Sentence-BERT},
587
+ author={Su Jianlin},
588
+ year={2022},
589
+ month={Jan},
590
+ url={https://kexue.fm/archives/8847},
591
+ }
592
+ ```
593
+
594
+ <!--
595
+ ## Glossary
596
+
597
+ *Clearly define terms in order to be accessible across audiences.*
598
+ -->
599
+
600
+ <!--
601
+ ## Model Card Authors
602
+
603
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
604
+ -->
605
+
606
+ <!--
607
+ ## Model Card Contact
608
+
609
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
610
+ -->
checkpoint-3128/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 384,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 1536,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.41.0",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 250037
26
+ }
checkpoint-3128/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.0.0",
4
+ "transformers": "4.7.0",
5
+ "pytorch": "1.9.0+cu102"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
checkpoint-3128/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19f15c426e0a8e3ede12b4f3bb95161ed5a4c5a51bcb20ab519919596e330412
3
+ size 470637416
checkpoint-3128/modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
checkpoint-3128/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a405964bc0e82a7e1469514b996b71e65018ff76733fce47d9f2788f24fc5ab
3
+ size 940212218
checkpoint-3128/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46c53b1054e154626ecf95cb41fffe8108cccf21bc4843652c954100009a36a3
3
+ size 14180
checkpoint-3128/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d72a55fbec173081120a6cdf969c078f1cc9ef4f2e0dc0f64b604e716642a81e
3
+ size 1064
checkpoint-3128/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 128,
3
+ "do_lower_case": false
4
+ }
checkpoint-3128/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
checkpoint-3128/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cad551d5600a84242d0973327029452a1e3672ba6313c2a3c3d69c4310e12719
3
+ size 17082987
checkpoint-3128/tokenizer_config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "do_lower_case": true,
48
+ "eos_token": "</s>",
49
+ "mask_token": "<mask>",
50
+ "max_length": 128,
51
+ "model_max_length": 128,
52
+ "pad_to_multiple_of": null,
53
+ "pad_token": "<pad>",
54
+ "pad_token_type_id": 0,
55
+ "padding_side": "right",
56
+ "sep_token": "</s>",
57
+ "stride": 0,
58
+ "strip_accents": null,
59
+ "tokenize_chinese_chars": true,
60
+ "tokenizer_class": "BertTokenizer",
61
+ "truncation_side": "right",
62
+ "truncation_strategy": "longest_first",
63
+ "unk_token": "<unk>"
64
+ }
checkpoint-3128/trainer_state.json ADDED
@@ -0,0 +1,989 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 5.793323516845703,
3
+ "best_model_checkpoint": "turkish-embedding-model/checkpoint-1564",
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 3128,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0319693094629156,
13
+ "grad_norm": 70.72708129882812,
14
+ "learning_rate": 1.1253196930946293e-06,
15
+ "loss": 17.17,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.0639386189258312,
20
+ "grad_norm": 81.68770599365234,
21
+ "learning_rate": 2.4040920716112534e-06,
22
+ "loss": 16.4932,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.0959079283887468,
27
+ "grad_norm": 109.91338348388672,
28
+ "learning_rate": 3.6828644501278778e-06,
29
+ "loss": 16.5976,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.1278772378516624,
34
+ "grad_norm": 73.892578125,
35
+ "learning_rate": 4.961636828644502e-06,
36
+ "loss": 15.6991,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.159846547314578,
41
+ "grad_norm": 79.35150909423828,
42
+ "learning_rate": 6.240409207161126e-06,
43
+ "loss": 14.876,
44
+ "step": 125
45
+ },
46
+ {
47
+ "epoch": 0.1918158567774936,
48
+ "grad_norm": 83.0904541015625,
49
+ "learning_rate": 7.5191815856777495e-06,
50
+ "loss": 14.4828,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.2237851662404092,
55
+ "grad_norm": 76.82855987548828,
56
+ "learning_rate": 8.797953964194374e-06,
57
+ "loss": 12.7061,
58
+ "step": 175
59
+ },
60
+ {
61
+ "epoch": 0.2557544757033248,
62
+ "grad_norm": 51.30181121826172,
63
+ "learning_rate": 1.0076726342710998e-05,
64
+ "loss": 10.8687,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.2877237851662404,
69
+ "grad_norm": 18.70808219909668,
70
+ "learning_rate": 1.1355498721227622e-05,
71
+ "loss": 8.3797,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 0.319693094629156,
76
+ "grad_norm": 1.3039417266845703,
77
+ "learning_rate": 1.2634271099744246e-05,
78
+ "loss": 6.2029,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.3516624040920716,
83
+ "grad_norm": 0.2324853092432022,
84
+ "learning_rate": 1.391304347826087e-05,
85
+ "loss": 5.8228,
86
+ "step": 275
87
+ },
88
+ {
89
+ "epoch": 0.3836317135549872,
90
+ "grad_norm": 0.1757364720106125,
91
+ "learning_rate": 1.5191815856777494e-05,
92
+ "loss": 5.811,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.4156010230179028,
97
+ "grad_norm": 0.1788654774427414,
98
+ "learning_rate": 1.647058823529412e-05,
99
+ "loss": 5.8079,
100
+ "step": 325
101
+ },
102
+ {
103
+ "epoch": 0.4475703324808184,
104
+ "grad_norm": 0.12862567603588104,
105
+ "learning_rate": 1.7749360613810744e-05,
106
+ "loss": 5.8077,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.479539641943734,
111
+ "grad_norm": 0.14497514069080353,
112
+ "learning_rate": 1.9028132992327367e-05,
113
+ "loss": 5.8035,
114
+ "step": 375
115
+ },
116
+ {
117
+ "epoch": 0.5115089514066496,
118
+ "grad_norm": 0.1350390762090683,
119
+ "learning_rate": 1.996589940323956e-05,
120
+ "loss": 5.8072,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.5434782608695652,
125
+ "grad_norm": 0.1435602754354477,
126
+ "learning_rate": 1.9823813583404378e-05,
127
+ "loss": 5.8033,
128
+ "step": 425
129
+ },
130
+ {
131
+ "epoch": 0.5754475703324808,
132
+ "grad_norm": 0.11389254033565521,
133
+ "learning_rate": 1.96817277635692e-05,
134
+ "loss": 5.8086,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 0.6074168797953964,
139
+ "grad_norm": 0.15821650624275208,
140
+ "learning_rate": 1.9539641943734017e-05,
141
+ "loss": 5.81,
142
+ "step": 475
143
+ },
144
+ {
145
+ "epoch": 0.639386189258312,
146
+ "grad_norm": 0.1179889366030693,
147
+ "learning_rate": 1.9397556123898838e-05,
148
+ "loss": 5.7949,
149
+ "step": 500
150
+ },
151
+ {
152
+ "epoch": 0.6713554987212276,
153
+ "grad_norm": 0.10912967473268509,
154
+ "learning_rate": 1.9255470304063656e-05,
155
+ "loss": 5.8079,
156
+ "step": 525
157
+ },
158
+ {
159
+ "epoch": 0.7033248081841432,
160
+ "grad_norm": 0.11702870577573776,
161
+ "learning_rate": 1.9113384484228477e-05,
162
+ "loss": 5.8057,
163
+ "step": 550
164
+ },
165
+ {
166
+ "epoch": 0.7352941176470589,
167
+ "grad_norm": 0.13132448494434357,
168
+ "learning_rate": 1.8971298664393295e-05,
169
+ "loss": 5.8097,
170
+ "step": 575
171
+ },
172
+ {
173
+ "epoch": 0.7672634271099744,
174
+ "grad_norm": 0.15833145380020142,
175
+ "learning_rate": 1.8829212844558116e-05,
176
+ "loss": 5.7986,
177
+ "step": 600
178
+ },
179
+ {
180
+ "epoch": 0.7992327365728901,
181
+ "grad_norm": 0.11651863902807236,
182
+ "learning_rate": 1.8687127024722937e-05,
183
+ "loss": 5.8051,
184
+ "step": 625
185
+ },
186
+ {
187
+ "epoch": 0.8312020460358056,
188
+ "grad_norm": 0.5393890142440796,
189
+ "learning_rate": 1.854504120488775e-05,
190
+ "loss": 5.8041,
191
+ "step": 650
192
+ },
193
+ {
194
+ "epoch": 0.8631713554987213,
195
+ "grad_norm": 0.6457561254501343,
196
+ "learning_rate": 1.8402955385052572e-05,
197
+ "loss": 5.7907,
198
+ "step": 675
199
+ },
200
+ {
201
+ "epoch": 0.8951406649616368,
202
+ "grad_norm": 0.5643135905265808,
203
+ "learning_rate": 1.8260869565217393e-05,
204
+ "loss": 5.7991,
205
+ "step": 700
206
+ },
207
+ {
208
+ "epoch": 0.9271099744245525,
209
+ "grad_norm": 3.214787721633911,
210
+ "learning_rate": 1.811878374538221e-05,
211
+ "loss": 5.8035,
212
+ "step": 725
213
+ },
214
+ {
215
+ "epoch": 0.959079283887468,
216
+ "grad_norm": 2.781162977218628,
217
+ "learning_rate": 1.7976697925547032e-05,
218
+ "loss": 5.7945,
219
+ "step": 750
220
+ },
221
+ {
222
+ "epoch": 0.9910485933503836,
223
+ "grad_norm": 0.38559335470199585,
224
+ "learning_rate": 1.783461210571185e-05,
225
+ "loss": 5.8077,
226
+ "step": 775
227
+ },
228
+ {
229
+ "epoch": 1.0,
230
+ "eval_loss": 5.8023600578308105,
231
+ "eval_runtime": 18.0632,
232
+ "eval_samples_per_second": 276.805,
233
+ "eval_steps_per_second": 4.374,
234
+ "eval_tr_ling_pearson_cosine": 0.017751548525136808,
235
+ "eval_tr_ling_pearson_dot": 0.025703597820631346,
236
+ "eval_tr_ling_pearson_euclidean": 0.02195284877201089,
237
+ "eval_tr_ling_pearson_manhattan": 0.02083376479528459,
238
+ "eval_tr_ling_pearson_max": 0.025703597820631346,
239
+ "eval_tr_ling_spearman_cosine": 0.027108099994157316,
240
+ "eval_tr_ling_spearman_dot": 0.03304394653738539,
241
+ "eval_tr_ling_spearman_euclidean": 0.025485959636772793,
242
+ "eval_tr_ling_spearman_manhattan": 0.024466610177699702,
243
+ "eval_tr_ling_spearman_max": 0.03304394653738539,
244
+ "step": 782
245
+ },
246
+ {
247
+ "epoch": 1.0230179028132993,
248
+ "grad_norm": 0.3645063638687134,
249
+ "learning_rate": 1.769252628587667e-05,
250
+ "loss": 5.6703,
251
+ "step": 800
252
+ },
253
+ {
254
+ "epoch": 1.054987212276215,
255
+ "grad_norm": 0.9638137817382812,
256
+ "learning_rate": 1.7550440466041488e-05,
257
+ "loss": 5.8052,
258
+ "step": 825
259
+ },
260
+ {
261
+ "epoch": 1.0869565217391304,
262
+ "grad_norm": 2.114203691482544,
263
+ "learning_rate": 1.740835464620631e-05,
264
+ "loss": 5.7936,
265
+ "step": 850
266
+ },
267
+ {
268
+ "epoch": 1.118925831202046,
269
+ "grad_norm": 1.8992066383361816,
270
+ "learning_rate": 1.7266268826371127e-05,
271
+ "loss": 5.7924,
272
+ "step": 875
273
+ },
274
+ {
275
+ "epoch": 1.1508951406649617,
276
+ "grad_norm": 2.8299577236175537,
277
+ "learning_rate": 1.7124183006535948e-05,
278
+ "loss": 5.7806,
279
+ "step": 900
280
+ },
281
+ {
282
+ "epoch": 1.1828644501278773,
283
+ "grad_norm": 1.956953525543213,
284
+ "learning_rate": 1.698209718670077e-05,
285
+ "loss": 5.7835,
286
+ "step": 925
287
+ },
288
+ {
289
+ "epoch": 1.2148337595907928,
290
+ "grad_norm": 2.658413887023926,
291
+ "learning_rate": 1.6840011366865587e-05,
292
+ "loss": 5.7619,
293
+ "step": 950
294
+ },
295
+ {
296
+ "epoch": 1.2468030690537084,
297
+ "grad_norm": 1.2760388851165771,
298
+ "learning_rate": 1.6697925547030408e-05,
299
+ "loss": 5.8038,
300
+ "step": 975
301
+ },
302
+ {
303
+ "epoch": 1.278772378516624,
304
+ "grad_norm": 1.7434897422790527,
305
+ "learning_rate": 1.6555839727195226e-05,
306
+ "loss": 5.779,
307
+ "step": 1000
308
+ },
309
+ {
310
+ "epoch": 1.3107416879795397,
311
+ "grad_norm": 1.3532071113586426,
312
+ "learning_rate": 1.6413753907360047e-05,
313
+ "loss": 5.7904,
314
+ "step": 1025
315
+ },
316
+ {
317
+ "epoch": 1.3427109974424551,
318
+ "grad_norm": 3.7385997772216797,
319
+ "learning_rate": 1.6271668087524864e-05,
320
+ "loss": 5.7696,
321
+ "step": 1050
322
+ },
323
+ {
324
+ "epoch": 1.3746803069053708,
325
+ "grad_norm": 0.9061102867126465,
326
+ "learning_rate": 1.6129582267689685e-05,
327
+ "loss": 5.7919,
328
+ "step": 1075
329
+ },
330
+ {
331
+ "epoch": 1.4066496163682864,
332
+ "grad_norm": 2.7104809284210205,
333
+ "learning_rate": 1.5987496447854503e-05,
334
+ "loss": 5.7785,
335
+ "step": 1100
336
+ },
337
+ {
338
+ "epoch": 1.438618925831202,
339
+ "grad_norm": 1.7147830724716187,
340
+ "learning_rate": 1.5845410628019324e-05,
341
+ "loss": 5.7862,
342
+ "step": 1125
343
+ },
344
+ {
345
+ "epoch": 1.4705882352941178,
346
+ "grad_norm": 2.525214672088623,
347
+ "learning_rate": 1.5703324808184145e-05,
348
+ "loss": 5.7703,
349
+ "step": 1150
350
+ },
351
+ {
352
+ "epoch": 1.5025575447570332,
353
+ "grad_norm": 1.7794997692108154,
354
+ "learning_rate": 1.5561238988348963e-05,
355
+ "loss": 5.773,
356
+ "step": 1175
357
+ },
358
+ {
359
+ "epoch": 1.5345268542199488,
360
+ "grad_norm": 4.901644229888916,
361
+ "learning_rate": 1.5419153168513784e-05,
362
+ "loss": 5.7627,
363
+ "step": 1200
364
+ },
365
+ {
366
+ "epoch": 1.5664961636828645,
367
+ "grad_norm": 3.360812187194824,
368
+ "learning_rate": 1.52770673486786e-05,
369
+ "loss": 5.7596,
370
+ "step": 1225
371
+ },
372
+ {
373
+ "epoch": 1.59846547314578,
374
+ "grad_norm": 1.2768888473510742,
375
+ "learning_rate": 1.5134981528843423e-05,
376
+ "loss": 5.7882,
377
+ "step": 1250
378
+ },
379
+ {
380
+ "epoch": 1.6304347826086958,
381
+ "grad_norm": 2.206226348876953,
382
+ "learning_rate": 1.4992895709008242e-05,
383
+ "loss": 5.7828,
384
+ "step": 1275
385
+ },
386
+ {
387
+ "epoch": 1.6624040920716112,
388
+ "grad_norm": 1.4602406024932861,
389
+ "learning_rate": 1.4850809889173061e-05,
390
+ "loss": 5.771,
391
+ "step": 1300
392
+ },
393
+ {
394
+ "epoch": 1.6943734015345269,
395
+ "grad_norm": 1.1597537994384766,
396
+ "learning_rate": 1.4708724069337881e-05,
397
+ "loss": 5.788,
398
+ "step": 1325
399
+ },
400
+ {
401
+ "epoch": 1.7263427109974425,
402
+ "grad_norm": 3.7494003772735596,
403
+ "learning_rate": 1.45666382495027e-05,
404
+ "loss": 5.7719,
405
+ "step": 1350
406
+ },
407
+ {
408
+ "epoch": 1.758312020460358,
409
+ "grad_norm": 1.6271498203277588,
410
+ "learning_rate": 1.442455242966752e-05,
411
+ "loss": 5.7846,
412
+ "step": 1375
413
+ },
414
+ {
415
+ "epoch": 1.7902813299232738,
416
+ "grad_norm": 2.0469117164611816,
417
+ "learning_rate": 1.4282466609832339e-05,
418
+ "loss": 5.7838,
419
+ "step": 1400
420
+ },
421
+ {
422
+ "epoch": 1.8222506393861893,
423
+ "grad_norm": 2.533921003341675,
424
+ "learning_rate": 1.4140380789997158e-05,
425
+ "loss": 5.7912,
426
+ "step": 1425
427
+ },
428
+ {
429
+ "epoch": 1.854219948849105,
430
+ "grad_norm": 3.291757583618164,
431
+ "learning_rate": 1.3998294970161978e-05,
432
+ "loss": 5.7686,
433
+ "step": 1450
434
+ },
435
+ {
436
+ "epoch": 1.8861892583120206,
437
+ "grad_norm": 3.0181350708007812,
438
+ "learning_rate": 1.3856209150326799e-05,
439
+ "loss": 5.7938,
440
+ "step": 1475
441
+ },
442
+ {
443
+ "epoch": 1.918158567774936,
444
+ "grad_norm": 2.553502321243286,
445
+ "learning_rate": 1.3714123330491618e-05,
446
+ "loss": 5.7847,
447
+ "step": 1500
448
+ },
449
+ {
450
+ "epoch": 1.9501278772378516,
451
+ "grad_norm": 1.8034719228744507,
452
+ "learning_rate": 1.3572037510656438e-05,
453
+ "loss": 5.7952,
454
+ "step": 1525
455
+ },
456
+ {
457
+ "epoch": 1.9820971867007673,
458
+ "grad_norm": 3.7138864994049072,
459
+ "learning_rate": 1.3429951690821257e-05,
460
+ "loss": 5.7528,
461
+ "step": 1550
462
+ },
463
+ {
464
+ "epoch": 2.0,
465
+ "eval_loss": 5.793323516845703,
466
+ "eval_runtime": 18.2796,
467
+ "eval_samples_per_second": 273.528,
468
+ "eval_steps_per_second": 4.322,
469
+ "eval_tr_ling_pearson_cosine": 0.037604255015168134,
470
+ "eval_tr_ling_pearson_dot": 0.0673696846368413,
471
+ "eval_tr_ling_pearson_euclidean": 0.03698411306484619,
472
+ "eval_tr_ling_pearson_manhattan": 0.034740275152181296,
473
+ "eval_tr_ling_pearson_max": 0.0673696846368413,
474
+ "eval_tr_ling_spearman_cosine": 0.04804112988506346,
475
+ "eval_tr_ling_spearman_dot": 0.06818119362900125,
476
+ "eval_tr_ling_spearman_euclidean": 0.03903062430281842,
477
+ "eval_tr_ling_spearman_manhattan": 0.03769766156967754,
478
+ "eval_tr_ling_spearman_max": 0.06818119362900125,
479
+ "step": 1564
480
+ },
481
+ {
482
+ "epoch": 2.0140664961636827,
483
+ "grad_norm": 2.8085248470306396,
484
+ "learning_rate": 1.3287865870986076e-05,
485
+ "loss": 5.65,
486
+ "step": 1575
487
+ },
488
+ {
489
+ "epoch": 2.0460358056265986,
490
+ "grad_norm": 3.3792033195495605,
491
+ "learning_rate": 1.3145780051150896e-05,
492
+ "loss": 5.7537,
493
+ "step": 1600
494
+ },
495
+ {
496
+ "epoch": 2.078005115089514,
497
+ "grad_norm": 3.44346022605896,
498
+ "learning_rate": 1.3003694231315715e-05,
499
+ "loss": 5.7098,
500
+ "step": 1625
501
+ },
502
+ {
503
+ "epoch": 2.10997442455243,
504
+ "grad_norm": 5.481964588165283,
505
+ "learning_rate": 1.2861608411480534e-05,
506
+ "loss": 5.7149,
507
+ "step": 1650
508
+ },
509
+ {
510
+ "epoch": 2.1419437340153453,
511
+ "grad_norm": 2.9816033840179443,
512
+ "learning_rate": 1.2719522591645354e-05,
513
+ "loss": 5.7585,
514
+ "step": 1675
515
+ },
516
+ {
517
+ "epoch": 2.1739130434782608,
518
+ "grad_norm": 3.2157652378082275,
519
+ "learning_rate": 1.2577436771810175e-05,
520
+ "loss": 5.7277,
521
+ "step": 1700
522
+ },
523
+ {
524
+ "epoch": 2.2058823529411766,
525
+ "grad_norm": 2.92006516456604,
526
+ "learning_rate": 1.2435350951974994e-05,
527
+ "loss": 5.7482,
528
+ "step": 1725
529
+ },
530
+ {
531
+ "epoch": 2.237851662404092,
532
+ "grad_norm": 3.7664051055908203,
533
+ "learning_rate": 1.2293265132139814e-05,
534
+ "loss": 5.7115,
535
+ "step": 1750
536
+ },
537
+ {
538
+ "epoch": 2.2698209718670075,
539
+ "grad_norm": 5.3445353507995605,
540
+ "learning_rate": 1.2151179312304633e-05,
541
+ "loss": 5.6895,
542
+ "step": 1775
543
+ },
544
+ {
545
+ "epoch": 2.3017902813299234,
546
+ "grad_norm": 4.100110054016113,
547
+ "learning_rate": 1.2009093492469452e-05,
548
+ "loss": 5.7389,
549
+ "step": 1800
550
+ },
551
+ {
552
+ "epoch": 2.333759590792839,
553
+ "grad_norm": 5.986413478851318,
554
+ "learning_rate": 1.1867007672634272e-05,
555
+ "loss": 5.7161,
556
+ "step": 1825
557
+ },
558
+ {
559
+ "epoch": 2.3657289002557547,
560
+ "grad_norm": 4.717130661010742,
561
+ "learning_rate": 1.1724921852799091e-05,
562
+ "loss": 5.7123,
563
+ "step": 1850
564
+ },
565
+ {
566
+ "epoch": 2.39769820971867,
567
+ "grad_norm": 2.833897352218628,
568
+ "learning_rate": 1.158283603296391e-05,
569
+ "loss": 5.7322,
570
+ "step": 1875
571
+ },
572
+ {
573
+ "epoch": 2.4296675191815855,
574
+ "grad_norm": 3.9461288452148438,
575
+ "learning_rate": 1.144075021312873e-05,
576
+ "loss": 5.7421,
577
+ "step": 1900
578
+ },
579
+ {
580
+ "epoch": 2.4616368286445014,
581
+ "grad_norm": 5.360823154449463,
582
+ "learning_rate": 1.1298664393293551e-05,
583
+ "loss": 5.7615,
584
+ "step": 1925
585
+ },
586
+ {
587
+ "epoch": 2.493606138107417,
588
+ "grad_norm": 3.290187120437622,
589
+ "learning_rate": 1.115657857345837e-05,
590
+ "loss": 5.7493,
591
+ "step": 1950
592
+ },
593
+ {
594
+ "epoch": 2.5255754475703327,
595
+ "grad_norm": 2.8723881244659424,
596
+ "learning_rate": 1.101449275362319e-05,
597
+ "loss": 5.7298,
598
+ "step": 1975
599
+ },
600
+ {
601
+ "epoch": 2.557544757033248,
602
+ "grad_norm": 12.763352394104004,
603
+ "learning_rate": 1.0872406933788009e-05,
604
+ "loss": 5.7529,
605
+ "step": 2000
606
+ },
607
+ {
608
+ "epoch": 2.5895140664961636,
609
+ "grad_norm": 3.423097610473633,
610
+ "learning_rate": 1.0730321113952828e-05,
611
+ "loss": 5.7318,
612
+ "step": 2025
613
+ },
614
+ {
615
+ "epoch": 2.6214833759590794,
616
+ "grad_norm": 3.546499252319336,
617
+ "learning_rate": 1.0588235294117648e-05,
618
+ "loss": 5.7036,
619
+ "step": 2050
620
+ },
621
+ {
622
+ "epoch": 2.653452685421995,
623
+ "grad_norm": 4.731326103210449,
624
+ "learning_rate": 1.0446149474282467e-05,
625
+ "loss": 5.7158,
626
+ "step": 2075
627
+ },
628
+ {
629
+ "epoch": 2.6854219948849103,
630
+ "grad_norm": 5.279483318328857,
631
+ "learning_rate": 1.0304063654447287e-05,
632
+ "loss": 5.7209,
633
+ "step": 2100
634
+ },
635
+ {
636
+ "epoch": 2.717391304347826,
637
+ "grad_norm": 5.814947605133057,
638
+ "learning_rate": 1.0161977834612106e-05,
639
+ "loss": 5.738,
640
+ "step": 2125
641
+ },
642
+ {
643
+ "epoch": 2.7493606138107416,
644
+ "grad_norm": 4.115816116333008,
645
+ "learning_rate": 1.0019892014776927e-05,
646
+ "loss": 5.7337,
647
+ "step": 2150
648
+ },
649
+ {
650
+ "epoch": 2.781329923273657,
651
+ "grad_norm": 4.176394462585449,
652
+ "learning_rate": 9.877806194941746e-06,
653
+ "loss": 5.713,
654
+ "step": 2175
655
+ },
656
+ {
657
+ "epoch": 2.813299232736573,
658
+ "grad_norm": 3.36919903755188,
659
+ "learning_rate": 9.735720375106566e-06,
660
+ "loss": 5.7257,
661
+ "step": 2200
662
+ },
663
+ {
664
+ "epoch": 2.8452685421994883,
665
+ "grad_norm": 4.4527482986450195,
666
+ "learning_rate": 9.593634555271385e-06,
667
+ "loss": 5.6958,
668
+ "step": 2225
669
+ },
670
+ {
671
+ "epoch": 2.877237851662404,
672
+ "grad_norm": 7.66256856918335,
673
+ "learning_rate": 9.451548735436205e-06,
674
+ "loss": 5.7053,
675
+ "step": 2250
676
+ },
677
+ {
678
+ "epoch": 2.9092071611253196,
679
+ "grad_norm": 11.90414810180664,
680
+ "learning_rate": 9.309462915601024e-06,
681
+ "loss": 5.7246,
682
+ "step": 2275
683
+ },
684
+ {
685
+ "epoch": 2.9411764705882355,
686
+ "grad_norm": 3.27648663520813,
687
+ "learning_rate": 9.167377095765843e-06,
688
+ "loss": 5.7291,
689
+ "step": 2300
690
+ },
691
+ {
692
+ "epoch": 2.973145780051151,
693
+ "grad_norm": 5.769582271575928,
694
+ "learning_rate": 9.025291275930663e-06,
695
+ "loss": 5.7139,
696
+ "step": 2325
697
+ },
698
+ {
699
+ "epoch": 3.0,
700
+ "eval_loss": 5.851009845733643,
701
+ "eval_runtime": 18.263,
702
+ "eval_samples_per_second": 273.777,
703
+ "eval_steps_per_second": 4.326,
704
+ "eval_tr_ling_pearson_cosine": 0.06129823646086187,
705
+ "eval_tr_ling_pearson_dot": 0.08667935948713909,
706
+ "eval_tr_ling_pearson_euclidean": 0.050963674624173616,
707
+ "eval_tr_ling_pearson_manhattan": 0.049471366228539336,
708
+ "eval_tr_ling_pearson_max": 0.08667935948713909,
709
+ "eval_tr_ling_spearman_cosine": 0.06262320788887717,
710
+ "eval_tr_ling_spearman_dot": 0.0836754651265069,
711
+ "eval_tr_ling_spearman_euclidean": 0.04874454654419082,
712
+ "eval_tr_ling_spearman_manhattan": 0.04780108900980343,
713
+ "eval_tr_ling_spearman_max": 0.0836754651265069,
714
+ "step": 2346
715
+ },
716
+ {
717
+ "epoch": 3.0051150895140664,
718
+ "grad_norm": 6.343133449554443,
719
+ "learning_rate": 8.883205456095482e-06,
720
+ "loss": 5.5715,
721
+ "step": 2350
722
+ },
723
+ {
724
+ "epoch": 3.0370843989769822,
725
+ "grad_norm": 7.939487457275391,
726
+ "learning_rate": 8.741119636260303e-06,
727
+ "loss": 5.6558,
728
+ "step": 2375
729
+ },
730
+ {
731
+ "epoch": 3.0690537084398977,
732
+ "grad_norm": 3.734879493713379,
733
+ "learning_rate": 8.599033816425122e-06,
734
+ "loss": 5.6441,
735
+ "step": 2400
736
+ },
737
+ {
738
+ "epoch": 3.101023017902813,
739
+ "grad_norm": 6.058401584625244,
740
+ "learning_rate": 8.456947996589942e-06,
741
+ "loss": 5.6569,
742
+ "step": 2425
743
+ },
744
+ {
745
+ "epoch": 3.132992327365729,
746
+ "grad_norm": 4.311662673950195,
747
+ "learning_rate": 8.314862176754761e-06,
748
+ "loss": 5.669,
749
+ "step": 2450
750
+ },
751
+ {
752
+ "epoch": 3.1649616368286444,
753
+ "grad_norm": 8.782428741455078,
754
+ "learning_rate": 8.17277635691958e-06,
755
+ "loss": 5.6361,
756
+ "step": 2475
757
+ },
758
+ {
759
+ "epoch": 3.1969309462915603,
760
+ "grad_norm": 7.427972793579102,
761
+ "learning_rate": 8.0306905370844e-06,
762
+ "loss": 5.6524,
763
+ "step": 2500
764
+ },
765
+ {
766
+ "epoch": 3.2289002557544757,
767
+ "grad_norm": 5.069025993347168,
768
+ "learning_rate": 7.88860471724922e-06,
769
+ "loss": 5.6773,
770
+ "step": 2525
771
+ },
772
+ {
773
+ "epoch": 3.260869565217391,
774
+ "grad_norm": 8.149388313293457,
775
+ "learning_rate": 7.746518897414039e-06,
776
+ "loss": 5.6552,
777
+ "step": 2550
778
+ },
779
+ {
780
+ "epoch": 3.292838874680307,
781
+ "grad_norm": 6.453441619873047,
782
+ "learning_rate": 7.604433077578858e-06,
783
+ "loss": 5.6807,
784
+ "step": 2575
785
+ },
786
+ {
787
+ "epoch": 3.3248081841432224,
788
+ "grad_norm": 6.5807719230651855,
789
+ "learning_rate": 7.4623472577436775e-06,
790
+ "loss": 5.6638,
791
+ "step": 2600
792
+ },
793
+ {
794
+ "epoch": 3.3567774936061383,
795
+ "grad_norm": 10.392335891723633,
796
+ "learning_rate": 7.320261437908497e-06,
797
+ "loss": 5.6582,
798
+ "step": 2625
799
+ },
800
+ {
801
+ "epoch": 3.3887468030690537,
802
+ "grad_norm": 9.251813888549805,
803
+ "learning_rate": 7.178175618073316e-06,
804
+ "loss": 5.658,
805
+ "step": 2650
806
+ },
807
+ {
808
+ "epoch": 3.420716112531969,
809
+ "grad_norm": 5.527411460876465,
810
+ "learning_rate": 7.036089798238136e-06,
811
+ "loss": 5.6626,
812
+ "step": 2675
813
+ },
814
+ {
815
+ "epoch": 3.452685421994885,
816
+ "grad_norm": 5.650461673736572,
817
+ "learning_rate": 6.894003978402956e-06,
818
+ "loss": 5.6802,
819
+ "step": 2700
820
+ },
821
+ {
822
+ "epoch": 3.4846547314578005,
823
+ "grad_norm": 7.156338691711426,
824
+ "learning_rate": 6.751918158567775e-06,
825
+ "loss": 5.6377,
826
+ "step": 2725
827
+ },
828
+ {
829
+ "epoch": 3.516624040920716,
830
+ "grad_norm": 6.843425750732422,
831
+ "learning_rate": 6.6098323387325946e-06,
832
+ "loss": 5.6752,
833
+ "step": 2750
834
+ },
835
+ {
836
+ "epoch": 3.5485933503836318,
837
+ "grad_norm": 14.204697608947754,
838
+ "learning_rate": 6.467746518897414e-06,
839
+ "loss": 5.6573,
840
+ "step": 2775
841
+ },
842
+ {
843
+ "epoch": 3.580562659846547,
844
+ "grad_norm": 3.9053664207458496,
845
+ "learning_rate": 6.325660699062234e-06,
846
+ "loss": 5.6963,
847
+ "step": 2800
848
+ },
849
+ {
850
+ "epoch": 3.612531969309463,
851
+ "grad_norm": 13.336016654968262,
852
+ "learning_rate": 6.1835748792270535e-06,
853
+ "loss": 5.7007,
854
+ "step": 2825
855
+ },
856
+ {
857
+ "epoch": 3.6445012787723785,
858
+ "grad_norm": 5.112432956695557,
859
+ "learning_rate": 6.041489059391873e-06,
860
+ "loss": 5.6746,
861
+ "step": 2850
862
+ },
863
+ {
864
+ "epoch": 3.6764705882352944,
865
+ "grad_norm": 6.077632427215576,
866
+ "learning_rate": 5.899403239556692e-06,
867
+ "loss": 5.6312,
868
+ "step": 2875
869
+ },
870
+ {
871
+ "epoch": 3.70843989769821,
872
+ "grad_norm": 10.304828643798828,
873
+ "learning_rate": 5.757317419721512e-06,
874
+ "loss": 5.5596,
875
+ "step": 2900
876
+ },
877
+ {
878
+ "epoch": 3.7404092071611252,
879
+ "grad_norm": 9.45308780670166,
880
+ "learning_rate": 5.615231599886332e-06,
881
+ "loss": 5.7003,
882
+ "step": 2925
883
+ },
884
+ {
885
+ "epoch": 3.772378516624041,
886
+ "grad_norm": 6.124211311340332,
887
+ "learning_rate": 5.473145780051151e-06,
888
+ "loss": 5.6739,
889
+ "step": 2950
890
+ },
891
+ {
892
+ "epoch": 3.8043478260869565,
893
+ "grad_norm": 8.547770500183105,
894
+ "learning_rate": 5.331059960215971e-06,
895
+ "loss": 5.655,
896
+ "step": 2975
897
+ },
898
+ {
899
+ "epoch": 3.836317135549872,
900
+ "grad_norm": 6.203834533691406,
901
+ "learning_rate": 5.18897414038079e-06,
902
+ "loss": 5.6787,
903
+ "step": 3000
904
+ },
905
+ {
906
+ "epoch": 3.868286445012788,
907
+ "grad_norm": 4.0565643310546875,
908
+ "learning_rate": 5.04688832054561e-06,
909
+ "loss": 5.643,
910
+ "step": 3025
911
+ },
912
+ {
913
+ "epoch": 3.9002557544757033,
914
+ "grad_norm": 9.590073585510254,
915
+ "learning_rate": 4.90480250071043e-06,
916
+ "loss": 5.6412,
917
+ "step": 3050
918
+ },
919
+ {
920
+ "epoch": 3.9322250639386187,
921
+ "grad_norm": 9.556587219238281,
922
+ "learning_rate": 4.762716680875249e-06,
923
+ "loss": 5.758,
924
+ "step": 3075
925
+ },
926
+ {
927
+ "epoch": 3.9641943734015346,
928
+ "grad_norm": 5.743387222290039,
929
+ "learning_rate": 4.620630861040068e-06,
930
+ "loss": 5.6769,
931
+ "step": 3100
932
+ },
933
+ {
934
+ "epoch": 3.99616368286445,
935
+ "grad_norm": 7.73360013961792,
936
+ "learning_rate": 4.478545041204888e-06,
937
+ "loss": 5.7206,
938
+ "step": 3125
939
+ },
940
+ {
941
+ "epoch": 4.0,
942
+ "eval_loss": 5.9124884605407715,
943
+ "eval_runtime": 18.3869,
944
+ "eval_samples_per_second": 271.933,
945
+ "eval_steps_per_second": 4.297,
946
+ "eval_tr_ling_pearson_cosine": 0.058743115070889876,
947
+ "eval_tr_ling_pearson_dot": 0.08477622619519222,
948
+ "eval_tr_ling_pearson_euclidean": 0.04709170917685587,
949
+ "eval_tr_ling_pearson_manhattan": 0.04582145815494953,
950
+ "eval_tr_ling_pearson_max": 0.08477622619519222,
951
+ "eval_tr_ling_spearman_cosine": 0.059526247945378225,
952
+ "eval_tr_ling_spearman_dot": 0.08243745050110735,
953
+ "eval_tr_ling_spearman_euclidean": 0.04407504959649961,
954
+ "eval_tr_ling_spearman_manhattan": 0.04331287037397966,
955
+ "eval_tr_ling_spearman_max": 0.08243745050110735,
956
+ "step": 3128
957
+ }
958
+ ],
959
+ "logging_steps": 25,
960
+ "max_steps": 3910,
961
+ "num_input_tokens_seen": 0,
962
+ "num_train_epochs": 5,
963
+ "save_steps": 500,
964
+ "stateful_callbacks": {
965
+ "EarlyStoppingCallback": {
966
+ "args": {
967
+ "early_stopping_patience": 5,
968
+ "early_stopping_threshold": 0.01
969
+ },
970
+ "attributes": {
971
+ "early_stopping_patience_counter": 0
972
+ }
973
+ },
974
+ "TrainerControl": {
975
+ "args": {
976
+ "should_epoch_stop": false,
977
+ "should_evaluate": false,
978
+ "should_log": false,
979
+ "should_save": true,
980
+ "should_training_stop": false
981
+ },
982
+ "attributes": {}
983
+ }
984
+ },
985
+ "total_flos": 0.0,
986
+ "train_batch_size": 32,
987
+ "trial_name": null,
988
+ "trial_params": null
989
+ }
checkpoint-3128/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d44b0bec8869c08bdad0d597184d7a293a0e13eb770d6f4384456cbbe4fe5aa4
3
+ size 5368
checkpoint-3128/unigram.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da145b5e7700ae40f16691ec32a0b1fdc1ee3298db22a31ea55f57a966c4a65d
3
+ size 14763260
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 384,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 1536,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.41.0",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 250037
26
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.0.0",
4
+ "transformers": "4.7.0",
5
+ "pytorch": "1.9.0+cu102"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c62de6c84b1b2f1dd37271b7c328633d87e20fa33799d8acc6dd17e7276782d
3
+ size 470637416
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
runs/May29_18-52-15_338a77628651/events.out.tfevents.1717008737.338a77628651.19835.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5774c9706e36237c87022fe6c74d36b8271a65724734def30bdcacf5b57d7733
3
- size 88
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1632c2aa6d9fe6916a4273d267eaecda68935aa9551a1745cdf8635a0fda945
3
+ size 11052
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 128,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cad551d5600a84242d0973327029452a1e3672ba6313c2a3c3d69c4310e12719
3
+ size 17082987
tokenizer_config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "do_lower_case": true,
48
+ "eos_token": "</s>",
49
+ "mask_token": "<mask>",
50
+ "max_length": 128,
51
+ "model_max_length": 128,
52
+ "pad_to_multiple_of": null,
53
+ "pad_token": "<pad>",
54
+ "pad_token_type_id": 0,
55
+ "padding_side": "right",
56
+ "sep_token": "</s>",
57
+ "stride": 0,
58
+ "strip_accents": null,
59
+ "tokenize_chinese_chars": true,
60
+ "tokenizer_class": "BertTokenizer",
61
+ "truncation_side": "right",
62
+ "truncation_strategy": "longest_first",
63
+ "unk_token": "<unk>"
64
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25f2c16c1654f713df68c72180bda7d273f3cdf401edb9ec64dfe4946a012590
3
+ size 5368
training_params.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "ucsahin/TR-Extractive-QA-5K",
3
+ "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
4
+ "lr": 2e-05,
5
+ "epochs": 5,
6
+ "max_seq_length": 128,
7
+ "batch_size": 32,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": null,
17
+ "logging_steps": -1,
18
+ "project_name": "turkish-embedding-model",
19
+ "auto_find_batch_size": false,
20
+ "mixed_precision": "fp16",
21
+ "save_total_limit": 1,
22
+ "push_to_hub": true,
23
+ "evaluation_strategy": "epoch",
24
+ "username": "acayir64",
25
+ "log": "tensorboard",
26
+ "early_stopping_patience": 5,
27
+ "early_stopping_threshold": 0.01,
28
+ "trainer": "qa",
29
+ "sentence1_column": "question",
30
+ "sentence2_column": "answer",
31
+ "sentence3_column": "sentence3",
32
+ "target_column": "target"
33
+ }
unigram.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da145b5e7700ae40f16691ec32a0b1fdc1ee3298db22a31ea55f57a966c4a65d
3
+ size 14763260