lapp0 commited on
Commit
43fd07b
·
verified ·
1 Parent(s): 2d32b02

Training in progress, step 125

Browse files
Files changed (12) hide show
  1. benchmarks.shelve.dat +0 -0
  2. config.json +1 -1
  3. logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0001, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1727022301.1c1a426a2fee +3 -0
  4. logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0001, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1727023146.1c1a426a2fee +3 -0
  5. logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0001, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1727023719.1c1a426a2fee +3 -0
  6. logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0001, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1727024062.1c1a426a2fee +3 -0
  7. logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0001, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1727025056.1c1a426a2fee +3 -0
  8. logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0001, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1727025458.1c1a426a2fee +3 -0
  9. model.safetensors +1 -1
  10. tokenizer.json +1 -1
  11. tokenizer_config.json +1 -1
  12. training_args.bin +1 -1
benchmarks.shelve.dat CHANGED
Binary files a/benchmarks.shelve.dat and b/benchmarks.shelve.dat differ
 
config.json CHANGED
@@ -12,7 +12,7 @@
12
  "hidden_size": 576,
13
  "initializer_range": 0.02,
14
  "intermediate_size": 1536,
15
- "max_position_embeddings": 2048,
16
  "mlp_bias": false,
17
  "model_type": "llama",
18
  "num_attention_heads": 9,
 
12
  "hidden_size": 576,
13
  "initializer_range": 0.02,
14
  "intermediate_size": 1536,
15
+ "max_position_embeddings": 1024,
16
  "mlp_bias": false,
17
  "model_type": "llama",
18
  "num_attention_heads": 9,
logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0001, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1727022301.1c1a426a2fee ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5036ac3b1a677b1b849c416aa627a939bbef278185e2f19ef5318c184cd5004
3
+ size 511
logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0001, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1727023146.1c1a426a2fee ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:918591d3973c64fa747334c1d24312275a181b3ac797d4473017c6a76a7e7990
3
+ size 40
logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0001, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1727023719.1c1a426a2fee ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97fd4b7bd8ab64166b2a3052902f10450508779fb1ab7cb2e0dd27ebc9209c76
3
+ size 5401
logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0001, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1727024062.1c1a426a2fee ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d8442b294316693d62dbe68f46f76668d8ef7083b6ec1927fda0939ae1d49f2
3
+ size 5854
logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0001, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1727025056.1c1a426a2fee ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94884cff1502b49daf186b7ceedca7f0fd870bd2457e038664b55853a0891a4f
3
+ size 5598
logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0001, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1727025458.1c1a426a2fee ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d470d5c8119cbf71600dc64cb5184c96eed24af537d8b4eba7b66ed31ffb8a9
3
+ size 9163
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b767b51383f2ecc0f2b377273c67ccc7a6c933dcd8895a1ae890747d3d9b44ef
3
  size 325669528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ece4acb8e87423306035fd87bde134984d419f8b2a41b121154934af62b9c2a
3
  size 325669528
tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 511,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 1023,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
tokenizer_config.json CHANGED
@@ -160,7 +160,7 @@
160
  "bos_token": "<|endoftext|>",
161
  "clean_up_tokenization_spaces": false,
162
  "eos_token": "<|endoftext|>",
163
- "model_max_length": 1000000000000000019884624838656,
164
  "pad_token": "<|endoftext|>",
165
  "tokenizer_class": "GPT2Tokenizer",
166
  "unk_token": "<|endoftext|>",
 
160
  "bos_token": "<|endoftext|>",
161
  "clean_up_tokenization_spaces": false,
162
  "eos_token": "<|endoftext|>",
163
+ "model_max_length": 1024,
164
  "pad_token": "<|endoftext|>",
165
  "tokenizer_class": "GPT2Tokenizer",
166
  "unk_token": "<|endoftext|>",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eeef6d1bbe2dfc3585646903a446ca690711f76fe21b79c270327c4b86d07768
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33770307270dda4ff4ae179bd8dcf984c23818fe946bac64c4f3bb769b90e44d
3
  size 5688