richie-ghost
commited on
Commit
•
2138b78
1
Parent(s):
ab9ca51
Push model using huggingface_hub.
Browse files- README.md +19 -6
- model.safetensors +1 -1
- model_head.pkl +1 -1
- tokenizer.json +1 -3
README.md
CHANGED
@@ -109,12 +109,13 @@ preds = model("Have a good day!")
|
|
109 |
| 1 | 116 |
|
110 |
|
111 |
### Training Hyperparameters
|
112 |
-
- batch_size: (
|
113 |
-
- num_epochs: (
|
114 |
- max_steps: -1
|
115 |
- sampling_strategy: oversampling
|
116 |
-
-
|
117 |
-
-
|
|
|
118 |
- loss: CosineSimilarityLoss
|
119 |
- distance_metric: cosine_distance
|
120 |
- margin: 0.25
|
@@ -123,9 +124,8 @@ preds = model("Have a good day!")
|
|
123 |
- warmup_proportion: 0.1
|
124 |
- l2_weight: 0.01
|
125 |
- seed: 42
|
126 |
-
- evaluation_strategy: epoch
|
127 |
- eval_max_steps: -1
|
128 |
-
- load_best_model_at_end:
|
129 |
|
130 |
### Training Results
|
131 |
| Epoch | Step | Training Loss | Validation Loss |
|
@@ -133,6 +133,19 @@ preds = model("Have a good day!")
|
|
133 |
| 0.0009 | 1 | 0.3528 | - |
|
134 |
| 1.0 | 1068 | 0.0252 | 0.0729 |
|
135 |
| 2.0 | 2136 | 0.0001 | 0.0544 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
### Framework Versions
|
138 |
- Python: 3.10.12
|
|
|
109 |
| 1 | 116 |
|
110 |
|
111 |
### Training Hyperparameters
|
112 |
+
- batch_size: (16, 16)
|
113 |
+
- num_epochs: (1, 1)
|
114 |
- max_steps: -1
|
115 |
- sampling_strategy: oversampling
|
116 |
+
- num_iterations: 20
|
117 |
+
- body_learning_rate: (2e-05, 2e-05)
|
118 |
+
- head_learning_rate: 2e-05
|
119 |
- loss: CosineSimilarityLoss
|
120 |
- distance_metric: cosine_distance
|
121 |
- margin: 0.25
|
|
|
124 |
- warmup_proportion: 0.1
|
125 |
- l2_weight: 0.01
|
126 |
- seed: 42
|
|
|
127 |
- eval_max_steps: -1
|
128 |
+
- load_best_model_at_end: False
|
129 |
|
130 |
### Training Results
|
131 |
| Epoch | Step | Training Loss | Validation Loss |
|
|
|
133 |
| 0.0009 | 1 | 0.3528 | - |
|
134 |
| 1.0 | 1068 | 0.0252 | 0.0729 |
|
135 |
| 2.0 | 2136 | 0.0001 | 0.0544 |
|
136 |
+
| 0.0015 | 1 | 0.0 | - |
|
137 |
+
| 0.0772 | 50 | 0.001 | - |
|
138 |
+
| 0.1543 | 100 | 0.0 | - |
|
139 |
+
| 0.2315 | 150 | 0.0 | - |
|
140 |
+
| 0.3086 | 200 | 0.0 | - |
|
141 |
+
| 0.3858 | 250 | 0.0015 | - |
|
142 |
+
| 0.4630 | 300 | 0.001 | - |
|
143 |
+
| 0.5401 | 350 | 0.0 | - |
|
144 |
+
| 0.6173 | 400 | 0.0 | - |
|
145 |
+
| 0.6944 | 450 | 0.0 | - |
|
146 |
+
| 0.7716 | 500 | 0.0 | - |
|
147 |
+
| 0.8488 | 550 | 0.0 | - |
|
148 |
+
| 0.9259 | 600 | 0.0 | - |
|
149 |
|
150 |
### Framework Versions
|
151 |
- Python: 3.10.12
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 98453640
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54f3c99a0285cbcae6e28b4efd74e0bc16c3464208d28b24982dadb693780bd3
|
3 |
size 98453640
|
model_head.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5611
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4761c7f1047edcbea61dab5f490686b765a055af9c42336c6b54f555de2a4da1
|
3 |
size 5611
|
tokenizer.json
CHANGED
@@ -7,9 +7,7 @@
|
|
7 |
"stride": 0
|
8 |
},
|
9 |
"padding": {
|
10 |
-
"strategy":
|
11 |
-
"Fixed": 512
|
12 |
-
},
|
13 |
"direction": "Right",
|
14 |
"pad_to_multiple_of": null,
|
15 |
"pad_id": 0,
|
|
|
7 |
"stride": 0
|
8 |
},
|
9 |
"padding": {
|
10 |
+
"strategy": "BatchLongest",
|
|
|
|
|
11 |
"direction": "Right",
|
12 |
"pad_to_multiple_of": null,
|
13 |
"pad_id": 0,
|