sam-at commited on
Commit
3423cd6
·
verified ·
1 Parent(s): 628cee2

Model save

Browse files
README.md CHANGED
@@ -3,15 +3,10 @@ library_name: transformers
3
  license: llama3.2
4
  base_model: meta-llama/Llama-3.2-1B
5
  tags:
6
- - alignment-handbook
7
- - trl
8
- - sft
9
- - generated_from_trainer
10
  - trl
11
  - sft
 
12
  - generated_from_trainer
13
- datasets:
14
- - data/small_transduction_heavy
15
  model-index:
16
  - name: llama_32_1b_testing
17
  results: []
@@ -22,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
22
 
23
  # llama_32_1b_testing
24
 
25
- This model is a fine-tuned version of [meta-llama/Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B) on the data/small_transduction_heavy dataset.
26
 
27
  ## Model description
28
 
 
3
  license: llama3.2
4
  base_model: meta-llama/Llama-3.2-1B
5
  tags:
 
 
 
 
6
  - trl
7
  - sft
8
+ - alignment-handbook
9
  - generated_from_trainer
 
 
10
  model-index:
11
  - name: llama_32_1b_testing
12
  results: []
 
17
 
18
  # llama_32_1b_testing
19
 
20
+ This model is a fine-tuned version of [meta-llama/Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B) on an unknown dataset.
21
 
22
  ## Model description
23
 
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 570977206272.0,
4
- "train_loss": 0.5035594925284386,
5
- "train_runtime": 19.8483,
6
  "train_samples": 1024,
7
- "train_samples_per_second": 51.591,
8
- "train_steps_per_second": 0.202
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 570977206272.0,
4
+ "train_loss": 0.5035885199904442,
5
+ "train_runtime": 17.9848,
6
  "train_samples": 1024,
7
+ "train_samples_per_second": 56.937,
8
+ "train_steps_per_second": 0.222
9
  }
config.json CHANGED
@@ -31,6 +31,6 @@
31
  "tie_word_embeddings": true,
32
  "torch_dtype": "bfloat16",
33
  "transformers_version": "4.45.0.dev0",
34
- "use_cache": true,
35
  "vocab_size": 128256
36
  }
 
31
  "tie_word_embeddings": true,
32
  "torch_dtype": "bfloat16",
33
  "transformers_version": "4.45.0.dev0",
34
+ "use_cache": false,
35
  "vocab_size": 128256
36
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f008024762f4d07e17b56f0b8ecb48e3a8b306de261f571de05422cca7bdcb00
3
  size 2471645608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad43bd7c79d44bfad86dc2612e5bb3e12ed10fa0489bb3921507c26bd0926958
3
  size 2471645608
runs/Jan10_02-47-49_a100-40gx8-dev/events.out.tfevents.1736477280.a100-40gx8-dev.25091.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f315437cfb9d3bbb344aa704f00ebaa8066ffc1cf9c888795383491efc7f240
3
+ size 7137
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 8192,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 570977206272.0,
4
- "train_loss": 0.5035594925284386,
5
- "train_runtime": 19.8483,
6
  "train_samples": 1024,
7
- "train_samples_per_second": 51.591,
8
- "train_steps_per_second": 0.202
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 570977206272.0,
4
+ "train_loss": 0.5035885199904442,
5
+ "train_runtime": 17.9848,
6
  "train_samples": 1024,
7
+ "train_samples_per_second": 56.937,
8
+ "train_steps_per_second": 0.222
9
  }
trainer_state.json CHANGED
@@ -10,40 +10,40 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.25,
13
- "grad_norm": 21.95132839761702,
14
  "learning_rate": 1e-05,
15
  "loss": 0.6512,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.5,
20
- "grad_norm": 19.159738592443173,
21
  "learning_rate": 7.500000000000001e-06,
22
  "loss": 0.5545,
23
  "step": 2
24
  },
25
  {
26
  "epoch": 0.75,
27
- "grad_norm": 10.60737320174702,
28
  "learning_rate": 2.5000000000000015e-06,
29
- "loss": 0.4432,
30
  "step": 3
31
  },
32
  {
33
  "epoch": 1.0,
34
- "grad_norm": 10.173966822336837,
35
  "learning_rate": 0.0,
36
- "loss": 0.3654,
37
  "step": 4
38
  },
39
  {
40
  "epoch": 1.0,
41
  "step": 4,
42
  "total_flos": 570977206272.0,
43
- "train_loss": 0.5035594925284386,
44
- "train_runtime": 19.8483,
45
- "train_samples_per_second": 51.591,
46
- "train_steps_per_second": 0.202
47
  }
48
  ],
49
  "logging_steps": 1,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.25,
13
+ "grad_norm": 21.951654746952823,
14
  "learning_rate": 1e-05,
15
  "loss": 0.6512,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.5,
20
+ "grad_norm": 19.16167803635957,
21
  "learning_rate": 7.500000000000001e-06,
22
  "loss": 0.5545,
23
  "step": 2
24
  },
25
  {
26
  "epoch": 0.75,
27
+ "grad_norm": 10.64027050880986,
28
  "learning_rate": 2.5000000000000015e-06,
29
+ "loss": 0.4433,
30
  "step": 3
31
  },
32
  {
33
  "epoch": 1.0,
34
+ "grad_norm": 10.18099018466467,
35
  "learning_rate": 0.0,
36
+ "loss": 0.3655,
37
  "step": 4
38
  },
39
  {
40
  "epoch": 1.0,
41
  "step": 4,
42
  "total_flos": 570977206272.0,
43
+ "train_loss": 0.5035885199904442,
44
+ "train_runtime": 17.9848,
45
+ "train_samples_per_second": 56.937,
46
+ "train_steps_per_second": 0.222
47
  }
48
  ],
49
  "logging_steps": 1,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66b018c5cc7f0d124f0f6db86d379b2f8deb008819f32a7736c2ac46b244d31e
3
  size 7160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a35f3d6fa879193785016c7c518bef52884520ea80b30a5c823df6360ec4859
3
  size 7160