Ramikan-BR commited on
Commit
68b3370
1 Parent(s): 6aa01b9

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +141 -0
README.md CHANGED
@@ -45,6 +45,147 @@ gguf_f16: [tinyllama-coder-py-4bit-v10-unsloth.F16.gguf](https://huggingface.co/
45
  gguf_Q4_K_M: [tinyllama-coder-py-4bit-v10-unsloth.Q4_K_M.gguf](https://huggingface.co/Ramikan-BR/tinyllama-coder-py-4bit-v10/blob/main/tinyllama-coder-py-4bit-v10-unsloth.Q4_K_M.gguf)
46
  gguf_Q8_0: [tinyllama-coder-py-4bit-v10-unsloth.Q8_0.gguf](https://huggingface.co/Ramikan-BR/tinyllama-coder-py-4bit-v10/blob/main/tinyllama-coder-py-4bit-v10-unsloth.Q8_0.gguf)
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated.
49
 
50
  This llama model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
 
45
  gguf_Q4_K_M: [tinyllama-coder-py-4bit-v10-unsloth.Q4_K_M.gguf](https://huggingface.co/Ramikan-BR/tinyllama-coder-py-4bit-v10/blob/main/tinyllama-coder-py-4bit-v10-unsloth.Q4_K_M.gguf)
46
  gguf_Q8_0: [tinyllama-coder-py-4bit-v10-unsloth.Q8_0.gguf](https://huggingface.co/Ramikan-BR/tinyllama-coder-py-4bit-v10/blob/main/tinyllama-coder-py-4bit-v10-unsloth.Q8_0.gguf)
47
 
48
+ #### Training Hyperparameters
49
+
50
+ Notebook [Unsloath](https://github.com/unslothai/unsloth) that I used for AI refinement: [TinyLlama](https://colab.research.google.com/drive/1AZghoNBQaMDgWJpi4RbffGM1h6raLUj9?usp=sharing)
51
+ ```python
52
+
53
+ %%capture
54
+ # Installs Unsloth, Xformers (Flash Attention) and all other packages!
55
+ !pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
56
+ !pip install --no-deps xformers trl peft accelerate bitsandbytes # xformers "xformers<0.0.26"
57
+
58
+ import os
59
+ from google.colab import drive
60
+ drive.mount('/content/drive')
61
+
62
+ from unsloth import FastLanguageModel
63
+ import torch
64
+ max_seq_length = 4096 # Choose any! We auto support RoPE Scaling internally!
65
+ dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
66
+ load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
67
+
68
+ # 4bit pre quantized models we support for 4x faster downloading + no OOMs.
69
+ fourbit_models = [
70
+ "unsloth/mistral-7b-bnb-4bit",
71
+ "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
72
+ "unsloth/llama-2-7b-bnb-4bit",
73
+ "unsloth/llama-2-13b-bnb-4bit",
74
+ "unsloth/codellama-34b-bnb-4bit",
75
+ "unsloth/tinyllama-bnb-4bit",
76
+ "unsloth/gemma-7b-bnb-4bit", # New Google 6 trillion tokens model 2.5x faster!
77
+ "unsloth/gemma-2b-bnb-4bit",
78
+ ] # More models at https://huggingface.co/unsloth
79
+
80
+ model, tokenizer = FastLanguageModel.from_pretrained(
81
+ model_name = "Ramikan-BR/tinyllama-coder-py-4bit_LORA-v9", # "unsloth/tinyllama" for 16bit loading
82
+ max_seq_length = max_seq_length,
83
+ dtype = dtype,
84
+ load_in_4bit = load_in_4bit,
85
+ # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
86
+ )
87
+
88
+ model = FastLanguageModel.get_peft_model(
89
+ model,
90
+ r = 256, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
91
+ target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
92
+ "gate_proj", "up_proj", "down_proj",],
93
+ lora_alpha = 512,
94
+ lora_dropout = 0, # Currently only supports dropout = 0
95
+ bias = "none", # Currently only supports bias = "none"
96
+ use_gradient_checkpointing = True, # @@@ IF YOU GET OUT OF MEMORY - set to True @@@
97
+ random_state = 3407,
98
+ use_rslora = False, # We support rank stabilized LoRA
99
+ loftq_config = None, # And LoftQ
100
+ )
101
+
102
+ alpaca_prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
103
+ ### Input:
104
+ {}
105
+
106
+ ### Output:
107
+ {}"""
108
+
109
+ EOS_TOKEN = tokenizer.eos_token
110
+ def formatting_prompts_func(examples):
111
+ inputs = examples["problem"]
112
+ outputs = examples["solution"]
113
+ texts = []
114
+ for input, output in zip(inputs, outputs):
115
+ # Must add EOS_TOKEN, otherwise your generation will go on forever!
116
+ text = alpaca_prompt.format(input, output) + EOS_TOKEN
117
+ texts.append(text)
118
+ return { "text" : texts}
119
+ pass
120
+
121
+ from datasets import load_dataset
122
+ dataset = load_dataset('json', data_files='/content/drive/MyDrive/data-oss_instruct-py-10.jsonl', split='train')
123
+ dataset = dataset.map(formatting_prompts_func, batched=True)
124
+
125
+ from trl import SFTTrainer
126
+ from transformers import TrainingArguments
127
+ from unsloth import is_bfloat16_supported
128
+ from transformers.utils import logging
129
+ logging.set_verbosity_info()
130
+
131
+ trainer = SFTTrainer(
132
+ model = model,
133
+ tokenizer = tokenizer,
134
+ train_dataset = dataset,
135
+ dataset_text_field = "text",
136
+ max_seq_length = max_seq_length,
137
+ dataset_num_proc = 2,
138
+ packing = True, # Packs short sequences together to save time!
139
+ args = TrainingArguments(
140
+ per_device_train_batch_size = 2,
141
+ gradient_accumulation_steps = 256,
142
+ warmup_ratio = 0.1,
143
+ num_train_epochs = 2,
144
+ learning_rate = 2e-4,
145
+ fp16 = not torch.cuda.is_bf16_supported(),
146
+ bf16 = torch.cuda.is_bf16_supported(),
147
+ logging_steps = 1,
148
+ optim = "adafactor", # adamw_torch ou adamw_torch_fused +10% velocidade ou adafactor ou adamw_8bit
149
+ weight_decay = 0.1,
150
+ lr_scheduler_type = "linear",
151
+ seed = 3407,
152
+ output_dir = "outputs",
153
+ ),
154
+ )
155
+
156
+ trainer_stats = trainer.train()
157
+
158
+ model.save_pretrained("lora_model") # Local saving
159
+ tokenizer.save_pretrained("lora_model")
160
+ model.push_to_hub("Ramikan-BR/tinyllama-coder-py-4bit_LORA-v10", token = "hf_...") # Online saving
161
+ tokenizer.push_to_hub("Ramikan-BR/tinyllama-coder-py-4bit_LORA-v10", token = "hf_...") # Online saving
162
+
163
+ # Merge to 16bit
164
+ model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",)
165
+ model.push_to_hub_merged("Ramikan-BR/tinyllama-coder-py-4bit-v10", tokenizer, save_method = "merged_16bit", token = "hf_...")
166
+
167
+ # Merge to 4bit
168
+ if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",)
169
+ if False: model.push_to_hub_merged("Ramikan-BR/tinyllama-coder-py-4bit-v10", tokenizer, save_method = "merged_4bit", token = "hf_...")
170
+
171
+ # Just LoRA adapters
172
+ if False: model.save_pretrained_merged("model", tokenizer, save_method = "lora",)
173
+ if False: model.push_to_hub_merged("Ramikan-BR/tinyllama-coder-py-4bit-v10", tokenizer, save_method = "lora", token = "hf_...")
174
+
175
+ # Save to 8bit Q8_0
176
+ model.save_pretrained_gguf("model", tokenizer,)
177
+ model.push_to_hub_gguf("Ramikan-BR/tinyllama-coder-py-4bit-v10", tokenizer, token = "hf_...")
178
+
179
+ # Save to 16bit GGUF
180
+ model.save_pretrained_gguf("model", tokenizer, quantization_method = "f16")
181
+ model.push_to_hub_gguf("Ramikan-BR/tinyllama-coder-py-4bit-v10", tokenizer, quantization_method = "f16", token = "hf_...")
182
+
183
+ # Save to q4_k_m GGUF
184
+ model.save_pretrained_gguf("model", tokenizer, quantization_method = "q4_k_m")
185
+ model.push_to_hub_gguf("Ramikan-BR/tinyllama-coder-py-4bit-v10", tokenizer, quantization_method = "q4_k_m", token = "hf_...")
186
+
187
+ Parameters:
188
+
189
  This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated.
190
 
191
  This llama model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.