NicholasCorrado commited on
Commit
fc3efc2
·
verified ·
1 Parent(s): 994b739

Model save

Browse files
Files changed (3) hide show
  1. README.md +2 -18
  2. all_results.json +6 -6
  3. train_results.json +6 -6
README.md CHANGED
@@ -3,16 +3,10 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
5
  tags:
6
- - alignment-handbook
7
- - trl
8
- - dpo
9
- - generated_from_trainer
10
  - trl
11
  - dpo
 
12
  - generated_from_trainer
13
- datasets:
14
- - data/ui_math_ref
15
- - data/ui_coding_ref
16
  model-index:
17
  - name: tinyllama-1.1b-chat-v1.0-ui-math-coding-group-dpo
18
  results: []
@@ -23,17 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
23
 
24
  # tinyllama-1.1b-chat-v1.0-ui-math-coding-group-dpo
25
 
26
- This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) on the data/ui_math_ref and the data/ui_coding_ref datasets.
27
- It achieves the following results on the evaluation set:
28
- - Loss: 0.6931
29
- - Rewards/chosen: 0.0
30
- - Rewards/rejected: 0.0
31
- - Rewards/accuracies: 0.0
32
- - Rewards/margins: 0.0
33
- - Logps/rejected: -291.9316
34
- - Logps/chosen: -308.5898
35
- - Logits/rejected: -2.1579
36
- - Logits/chosen: -2.1562
37
 
38
  ## Model description
39
 
 
3
  license: apache-2.0
4
  base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
5
  tags:
 
 
 
 
6
  - trl
7
  - dpo
8
+ - alignment-handbook
9
  - generated_from_trainer
 
 
 
10
  model-index:
11
  - name: tinyllama-1.1b-chat-v1.0-ui-math-coding-group-dpo
12
  results: []
 
17
 
18
  # tinyllama-1.1b-chat-v1.0-ui-math-coding-group-dpo
19
 
20
+ This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) on the None dataset.
 
 
 
 
 
 
 
 
 
 
21
 
22
  ## Model description
23
 
all_results.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "epoch": 1.0,
3
  "eval_logits/chosen": -2.156226873397827,
4
  "eval_logits/rejected": -2.1579225063323975,
5
  "eval_logps/chosen": -308.5898132324219,
@@ -14,9 +14,9 @@
14
  "eval_samples_per_second": 66.156,
15
  "eval_steps_per_second": 0.662,
16
  "total_flos": 0.0,
17
- "train_loss": 0.016206350177526474,
18
- "train_runtime": 33.2224,
19
- "train_samples": 99,
20
- "train_samples_per_second": 2.98,
21
- "train_steps_per_second": 0.03
22
  }
 
1
  {
2
+ "epoch": 0.9963369963369964,
3
  "eval_logits/chosen": -2.156226873397827,
4
  "eval_logits/rejected": -2.1579225063323975,
5
  "eval_logps/chosen": -308.5898132324219,
 
14
  "eval_samples_per_second": 66.156,
15
  "eval_steps_per_second": 0.662,
16
  "total_flos": 0.0,
17
+ "train_loss": 0.09771968247186319,
18
+ "train_runtime": 5407.8058,
19
+ "train_samples": 209650,
20
+ "train_samples_per_second": 38.768,
21
+ "train_steps_per_second": 0.038
22
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.016206350177526474,
5
- "train_runtime": 33.2224,
6
- "train_samples": 99,
7
- "train_samples_per_second": 2.98,
8
- "train_steps_per_second": 0.03
9
  }
 
1
  {
2
+ "epoch": 0.9963369963369964,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.09771968247186319,
5
+ "train_runtime": 5407.8058,
6
+ "train_samples": 209650,
7
+ "train_samples_per_second": 38.768,
8
+ "train_steps_per_second": 0.038
9
  }