Update README.md
Browse files
README.md
CHANGED
@@ -146,12 +146,6 @@ def enable_grad_only_every_nth(model, n):
|
|
146 |
for all other components of the model, including the embedding layers and the model's head. This setup is particularly
|
147 |
useful for fine-tuning processes where only a subset of layers are targeted for updates, ensuring efficient training and
|
148 |
adaptation of newly integrated layers while maintaining the pre-trained behavior of other model components.
|
149 |
-
|
150 |
-
:param model: The model instance, which is expected to have a structure compatible with selective layer training, such
|
151 |
-
as AutoModelForCausalLM.
|
152 |
-
:param n: The interval at which layers are selected for gradient enabling, starting with the first layer. This
|
153 |
-
parameter determines the sparsity of active training within the model's architecture, allowing for focused updates
|
154 |
-
on specific layers.
|
155 |
"""
|
156 |
|
157 |
# Freeze embeddings.
|
@@ -180,5 +174,4 @@ model = transformers.AutoModelForCausalLM.from_pretrained(
|
|
180 |
# Update layer gradients, specify the correct value for n based on your model's architecture
|
181 |
n =5
|
182 |
enable_grad_only_every_nth(model, n)
|
183 |
-
model_args.model_name_or_path = model
|
184 |
```
|
|
|
146 |
for all other components of the model, including the embedding layers and the model's head. This setup is particularly
|
147 |
useful for fine-tuning processes where only a subset of layers are targeted for updates, ensuring efficient training and
|
148 |
adaptation of newly integrated layers while maintaining the pre-trained behavior of other model components.
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
"""
|
150 |
|
151 |
# Freeze embeddings.
|
|
|
174 |
# Update layer gradients, specify the correct value for n based on your model's architecture
|
175 |
n =5
|
176 |
enable_grad_only_every_nth(model, n)
|
|
|
177 |
```
|