Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,7 @@ import os
|
|
5 |
from datasets import Dataset
|
6 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
|
7 |
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
|
8 |
-
import spaces # Import the spaces library
|
9 |
|
10 |
# Initialize logging
|
11 |
import logging
|
@@ -74,12 +74,10 @@ def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
|
|
74 |
|
75 |
# ============ MEMORY OPTIMIZATION 1: REDUCED BATCH SIZE ============
|
76 |
# A smaller batch size dramatically reduces memory usage during training
|
77 |
-
# For 7B models on limited VRAM (40GB), values between 1-8 are recommended
|
78 |
actual_batch_size = 8 if batch_size is None else min(batch_size, 8)
|
79 |
logger.info(f"Using batch size: {actual_batch_size} (reduced from original to save memory)")
|
80 |
|
81 |
# ============ MEMORY OPTIMIZATION 2: 8-bit QUANTIZATION ============
|
82 |
-
# Load model in 8-bit to reduce memory footprint during training
|
83 |
model = AutoModelForCausalLM.from_pretrained(
|
84 |
model_id,
|
85 |
load_in_8bit=True, # Use 8-bit quantization to reduce memory usage
|
@@ -95,20 +93,16 @@ def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
|
|
95 |
model = prepare_model_for_kbit_training(model)
|
96 |
|
97 |
# ============ MEMORY OPTIMIZATION 3: GRADIENT CHECKPOINTING ============
|
98 |
-
# Enable gradient checkpointing to trade compute for memory
|
99 |
-
# This recomputes forward activations during backward pass instead of storing them
|
100 |
model.gradient_checkpointing_enable()
|
101 |
logger.info("Gradient checkpointing enabled: trading computation for memory savings")
|
102 |
|
103 |
# ============ MEMORY OPTIMIZATION 4: OPTIMIZED LORA CONFIG ============
|
104 |
-
# Use lower rank and fewer modules to reduce memory requirements
|
105 |
peft_config = LoraConfig(
|
106 |
task_type=TaskType.CAUSAL_LM,
|
107 |
inference_mode=False,
|
108 |
r=4, # REDUCED from default 8/16 to save memory
|
109 |
lora_alpha=16, # Scaling factor
|
110 |
lora_dropout=0.1, # Dropout probability for regularization
|
111 |
-
# Target specific modules instead of all linear layers to save memory
|
112 |
target_modules=["q_proj", "v_proj"], # Only attention query and value projections
|
113 |
)
|
114 |
logger.info("Using optimized LoRA parameters with reduced rank (r=4) and targeted modules")
|
@@ -125,11 +119,9 @@ def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
|
|
125 |
per_device_train_batch_size=actual_batch_size,
|
126 |
per_device_eval_batch_size=actual_batch_size,
|
127 |
# ============ MEMORY OPTIMIZATION 6: MIXED PRECISION TRAINING ============
|
128 |
-
# Mixed precision significantly reduces memory usage
|
129 |
fp16=True, # Use FP16 for mixed precision training
|
130 |
# ============ MEMORY OPTIMIZATION 7: GRADIENT ACCUMULATION ============
|
131 |
-
#
|
132 |
-
gradient_accumulation_steps=4, # Accumulate gradients over 4 steps (effective batch size = 8*4=32)
|
133 |
# ============ MEMORY OPTIMIZATION 8: GRADIENT CHECKPOINTING IN ARGS ============
|
134 |
gradient_checkpointing=True,
|
135 |
# Other parameters
|
@@ -155,7 +147,6 @@ def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
|
|
155 |
)
|
156 |
|
157 |
# ============ MEMORY OPTIMIZATION 11: MANAGE CUDA CACHE ============
|
158 |
-
# Clear CUDA cache before training to start with a clean memory state
|
159 |
if torch.cuda.is_available():
|
160 |
torch.cuda.empty_cache()
|
161 |
logger.info("CUDA cache cleared before training")
|
@@ -247,6 +238,5 @@ with gr.Blocks() as demo:
|
|
247 |
outputs=training_output
|
248 |
)
|
249 |
|
250 |
-
# Launch the
|
251 |
-
spaces.zero.mount()
|
252 |
demo.queue().launch(debug=True)
|
|
|
5 |
from datasets import Dataset
|
6 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
|
7 |
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
|
8 |
+
import spaces # Import the spaces library
|
9 |
|
10 |
# Initialize logging
|
11 |
import logging
|
|
|
74 |
|
75 |
# ============ MEMORY OPTIMIZATION 1: REDUCED BATCH SIZE ============
|
76 |
# A smaller batch size dramatically reduces memory usage during training
|
|
|
77 |
actual_batch_size = 8 if batch_size is None else min(batch_size, 8)
|
78 |
logger.info(f"Using batch size: {actual_batch_size} (reduced from original to save memory)")
|
79 |
|
80 |
# ============ MEMORY OPTIMIZATION 2: 8-bit QUANTIZATION ============
|
|
|
81 |
model = AutoModelForCausalLM.from_pretrained(
|
82 |
model_id,
|
83 |
load_in_8bit=True, # Use 8-bit quantization to reduce memory usage
|
|
|
93 |
model = prepare_model_for_kbit_training(model)
|
94 |
|
95 |
# ============ MEMORY OPTIMIZATION 3: GRADIENT CHECKPOINTING ============
|
|
|
|
|
96 |
model.gradient_checkpointing_enable()
|
97 |
logger.info("Gradient checkpointing enabled: trading computation for memory savings")
|
98 |
|
99 |
# ============ MEMORY OPTIMIZATION 4: OPTIMIZED LORA CONFIG ============
|
|
|
100 |
peft_config = LoraConfig(
|
101 |
task_type=TaskType.CAUSAL_LM,
|
102 |
inference_mode=False,
|
103 |
r=4, # REDUCED from default 8/16 to save memory
|
104 |
lora_alpha=16, # Scaling factor
|
105 |
lora_dropout=0.1, # Dropout probability for regularization
|
|
|
106 |
target_modules=["q_proj", "v_proj"], # Only attention query and value projections
|
107 |
)
|
108 |
logger.info("Using optimized LoRA parameters with reduced rank (r=4) and targeted modules")
|
|
|
119 |
per_device_train_batch_size=actual_batch_size,
|
120 |
per_device_eval_batch_size=actual_batch_size,
|
121 |
# ============ MEMORY OPTIMIZATION 6: MIXED PRECISION TRAINING ============
|
|
|
122 |
fp16=True, # Use FP16 for mixed precision training
|
123 |
# ============ MEMORY OPTIMIZATION 7: GRADIENT ACCUMULATION ============
|
124 |
+
gradient_accumulation_steps=4, # Accumulate gradients over 4 steps
|
|
|
125 |
# ============ MEMORY OPTIMIZATION 8: GRADIENT CHECKPOINTING IN ARGS ============
|
126 |
gradient_checkpointing=True,
|
127 |
# Other parameters
|
|
|
147 |
)
|
148 |
|
149 |
# ============ MEMORY OPTIMIZATION 11: MANAGE CUDA CACHE ============
|
|
|
150 |
if torch.cuda.is_available():
|
151 |
torch.cuda.empty_cache()
|
152 |
logger.info("CUDA cache cleared before training")
|
|
|
238 |
outputs=training_output
|
239 |
)
|
240 |
|
241 |
+
# Launch the app - REMOVED the spaces.zero.mount() call that was causing the error
|
|
|
242 |
demo.queue().launch(debug=True)
|