anujsahani01 commited on
Commit
9257dea
1 Parent(s): 7001755

Training in progress, step 500

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
added_tokens.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|assistant|>": 49154,
3
+ "<|end|>": 49155,
4
+ "<|system|>": 49152,
5
+ "<|user|>": 49153,
6
+ "[ASSISTANT]": 49158,
7
+ "[END]": 49160,
8
+ "[PAD]": 49156,
9
+ "[SYSTEM]": 49157,
10
+ "[USER]": 49159
11
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "[SYSTEM]",
4
+ "[ASSISTANT]",
5
+ "[USER]",
6
+ "[END]"
7
+ ],
8
+ "bos_token": "<|endoftext|>",
9
+ "eos_token": "<|endoftext|>",
10
+ "pad_token": "[PAD]",
11
+ "unk_token": "<|endoftext|>"
12
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "additional_special_tokens": [
4
+ "[SYSTEM]",
5
+ "[ASSISTANT]",
6
+ "[USER]",
7
+ "[END]"
8
+ ],
9
+ "bos_token": "<|endoftext|>",
10
+ "clean_up_tokenization_spaces": true,
11
+ "eos_token": "<|endoftext|>",
12
+ "model_max_length": 1000,
13
+ "pad_token": "[PAD]",
14
+ "return_token_type_ids": false,
15
+ "tokenizer_class": "GPT2Tokenizer",
16
+ "unk_token": "<|endoftext|>",
17
+ "vocab_size": 49152
18
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f76bf2cc3126f9ab6669c3ec25d62743fcf906eab0e2b2fed21f94e11961eb3a
3
+ size 3899
vocab.json ADDED
The diff for this file is too large to render. See raw diff