amaltese commited on
Commit
1872e0d
·
verified ·
1 Parent(s): a4467aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -13
app.py CHANGED
@@ -5,7 +5,7 @@ import os
5
  from datasets import Dataset
6
  from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
7
  from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
8
- import spaces # Import the spaces library for HF Spaces integration
9
 
10
  # Initialize logging
11
  import logging
@@ -74,12 +74,10 @@ def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
74
 
75
  # ============ MEMORY OPTIMIZATION 1: REDUCED BATCH SIZE ============
76
  # A smaller batch size dramatically reduces memory usage during training
77
- # For 7B models on limited VRAM (40GB), values between 1-8 are recommended
78
  actual_batch_size = 8 if batch_size is None else min(batch_size, 8)
79
  logger.info(f"Using batch size: {actual_batch_size} (reduced from original to save memory)")
80
 
81
  # ============ MEMORY OPTIMIZATION 2: 8-bit QUANTIZATION ============
82
- # Load model in 8-bit to reduce memory footprint during training
83
  model = AutoModelForCausalLM.from_pretrained(
84
  model_id,
85
  load_in_8bit=True, # Use 8-bit quantization to reduce memory usage
@@ -95,20 +93,16 @@ def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
95
  model = prepare_model_for_kbit_training(model)
96
 
97
  # ============ MEMORY OPTIMIZATION 3: GRADIENT CHECKPOINTING ============
98
- # Enable gradient checkpointing to trade compute for memory
99
- # This recomputes forward activations during backward pass instead of storing them
100
  model.gradient_checkpointing_enable()
101
  logger.info("Gradient checkpointing enabled: trading computation for memory savings")
102
 
103
  # ============ MEMORY OPTIMIZATION 4: OPTIMIZED LORA CONFIG ============
104
- # Use lower rank and fewer modules to reduce memory requirements
105
  peft_config = LoraConfig(
106
  task_type=TaskType.CAUSAL_LM,
107
  inference_mode=False,
108
  r=4, # REDUCED from default 8/16 to save memory
109
  lora_alpha=16, # Scaling factor
110
  lora_dropout=0.1, # Dropout probability for regularization
111
- # Target specific modules instead of all linear layers to save memory
112
  target_modules=["q_proj", "v_proj"], # Only attention query and value projections
113
  )
114
  logger.info("Using optimized LoRA parameters with reduced rank (r=4) and targeted modules")
@@ -125,11 +119,9 @@ def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
125
  per_device_train_batch_size=actual_batch_size,
126
  per_device_eval_batch_size=actual_batch_size,
127
  # ============ MEMORY OPTIMIZATION 6: MIXED PRECISION TRAINING ============
128
- # Mixed precision significantly reduces memory usage
129
  fp16=True, # Use FP16 for mixed precision training
130
  # ============ MEMORY OPTIMIZATION 7: GRADIENT ACCUMULATION ============
131
- # Simulate larger batch sizes without the memory cost
132
- gradient_accumulation_steps=4, # Accumulate gradients over 4 steps (effective batch size = 8*4=32)
133
  # ============ MEMORY OPTIMIZATION 8: GRADIENT CHECKPOINTING IN ARGS ============
134
  gradient_checkpointing=True,
135
  # Other parameters
@@ -155,7 +147,6 @@ def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
155
  )
156
 
157
  # ============ MEMORY OPTIMIZATION 11: MANAGE CUDA CACHE ============
158
- # Clear CUDA cache before training to start with a clean memory state
159
  if torch.cuda.is_available():
160
  torch.cuda.empty_cache()
161
  logger.info("CUDA cache cleared before training")
@@ -247,6 +238,5 @@ with gr.Blocks() as demo:
247
  outputs=training_output
248
  )
249
 
250
- # Launch the Space
251
- spaces.zero.mount()
252
  demo.queue().launch(debug=True)
 
5
  from datasets import Dataset
6
  from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
7
  from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
8
+ import spaces # Import the spaces library
9
 
10
  # Initialize logging
11
  import logging
 
74
 
75
  # ============ MEMORY OPTIMIZATION 1: REDUCED BATCH SIZE ============
76
  # A smaller batch size dramatically reduces memory usage during training
 
77
  actual_batch_size = 8 if batch_size is None else min(batch_size, 8)
78
  logger.info(f"Using batch size: {actual_batch_size} (reduced from original to save memory)")
79
 
80
  # ============ MEMORY OPTIMIZATION 2: 8-bit QUANTIZATION ============
 
81
  model = AutoModelForCausalLM.from_pretrained(
82
  model_id,
83
  load_in_8bit=True, # Use 8-bit quantization to reduce memory usage
 
93
  model = prepare_model_for_kbit_training(model)
94
 
95
  # ============ MEMORY OPTIMIZATION 3: GRADIENT CHECKPOINTING ============
 
 
96
  model.gradient_checkpointing_enable()
97
  logger.info("Gradient checkpointing enabled: trading computation for memory savings")
98
 
99
  # ============ MEMORY OPTIMIZATION 4: OPTIMIZED LORA CONFIG ============
 
100
  peft_config = LoraConfig(
101
  task_type=TaskType.CAUSAL_LM,
102
  inference_mode=False,
103
  r=4, # REDUCED from default 8/16 to save memory
104
  lora_alpha=16, # Scaling factor
105
  lora_dropout=0.1, # Dropout probability for regularization
 
106
  target_modules=["q_proj", "v_proj"], # Only attention query and value projections
107
  )
108
  logger.info("Using optimized LoRA parameters with reduced rank (r=4) and targeted modules")
 
119
  per_device_train_batch_size=actual_batch_size,
120
  per_device_eval_batch_size=actual_batch_size,
121
  # ============ MEMORY OPTIMIZATION 6: MIXED PRECISION TRAINING ============
 
122
  fp16=True, # Use FP16 for mixed precision training
123
  # ============ MEMORY OPTIMIZATION 7: GRADIENT ACCUMULATION ============
124
+ gradient_accumulation_steps=4, # Accumulate gradients over 4 steps
 
125
  # ============ MEMORY OPTIMIZATION 8: GRADIENT CHECKPOINTING IN ARGS ============
126
  gradient_checkpointing=True,
127
  # Other parameters
 
147
  )
148
 
149
  # ============ MEMORY OPTIMIZATION 11: MANAGE CUDA CACHE ============
 
150
  if torch.cuda.is_available():
151
  torch.cuda.empty_cache()
152
  logger.info("CUDA cache cleared before training")
 
238
  outputs=training_output
239
  )
240
 
241
+ # Launch the app - REMOVED the spaces.zero.mount() call that was causing the error
 
242
  demo.queue().launch(debug=True)