Update README.md
Browse files
README.md
CHANGED
@@ -131,44 +131,6 @@ dataset = load_dataset("json", data_files=jsonl_file)
|
|
131 |
# Save the QA pairs to a JSONL file
|
132 |
```
|
133 |
|
134 |
-
|
135 |
-
**Setting Up LoRA Configuration**
|
136 |
-
```python
|
137 |
-
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
|
138 |
-
|
139 |
-
model.gradient_checkpointing_enable()
|
140 |
-
model = prepare_model_for_kbit_training(model)
|
141 |
-
|
142 |
-
import bitsandbytes as bnb
|
143 |
-
|
144 |
-
def find_all_linear_names(model):
|
145 |
-
cls = bnb.nn.Linear4bit # For 4-bit precision
|
146 |
-
lora_module_names = set()
|
147 |
-
for name, module in model.named_modules():
|
148 |
-
if isinstance(module, cls):
|
149 |
-
names = name.split('.')
|
150 |
-
lora_module_names.add(names[0] if len(names) == 1 else names[-1])
|
151 |
-
if 'lm_head' in lora_module_names: # Needed for 16-bit
|
152 |
-
lora_module_names.remove('lm_head')
|
153 |
-
return list(lora_module_names)
|
154 |
-
|
155 |
-
modules = find_all_linear_names(model)
|
156 |
-
|
157 |
-
lora_config = LoraConfig(
|
158 |
-
r=64,
|
159 |
-
lora_alpha=32,
|
160 |
-
target_modules=modules,
|
161 |
-
lora_dropout=0.05,
|
162 |
-
bias="none",
|
163 |
-
task_type="CAUSAL_LM"
|
164 |
-
)
|
165 |
-
|
166 |
-
model = get_peft_model(model, lora_config)
|
167 |
-
|
168 |
-
trainable, total = model.get_nb_trainable_parameters()
|
169 |
-
print(f"Trainable: {trainable} | Total: {total} | Percentage: {trainable/total*100:.4f}%")
|
170 |
-
```
|
171 |
-
|
172 |
**Loading/Preparing Training Data**
|
173 |
|
174 |
The dataset uploaded to HuggingFace is loaded, and a function is applied to split it into Instruction and Response.
|
|
|
131 |
# Save the QA pairs to a JSONL file
|
132 |
```
|
133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
**Loading/Preparing Training Data**
|
135 |
|
136 |
The dataset uploaded to HuggingFace is loaded, and a function is applied to split it into Instruction and Response.
|