Spaces:

amaltese
/

avatartestspace

Runtime error

App Files Files Community

amaltese commited on Feb 26

Commit

1872e0d

verified ·

1 Parent(s): a4467aa

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -13

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import os
 from datasets import Dataset
 from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
 from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
-import spaces  # Import the spaces library for HF Spaces integration
 # Initialize logging
 import logging
@@ -74,12 +74,10 @@ def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
     # ============ MEMORY OPTIMIZATION 1: REDUCED BATCH SIZE ============
     # A smaller batch size dramatically reduces memory usage during training
-    # For 7B models on limited VRAM (40GB), values between 1-8 are recommended
     actual_batch_size = 8 if batch_size is None else min(batch_size, 8)
     logger.info(f"Using batch size: {actual_batch_size} (reduced from original to save memory)")
     # ============ MEMORY OPTIMIZATION 2: 8-bit QUANTIZATION ============
-    # Load model in 8-bit to reduce memory footprint during training
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         load_in_8bit=True,  # Use 8-bit quantization to reduce memory usage
@@ -95,20 +93,16 @@ def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
     model = prepare_model_for_kbit_training(model)
     # ============ MEMORY OPTIMIZATION 3: GRADIENT CHECKPOINTING ============
-    # Enable gradient checkpointing to trade compute for memory
-    # This recomputes forward activations during backward pass instead of storing them
     model.gradient_checkpointing_enable()
     logger.info("Gradient checkpointing enabled: trading computation for memory savings")
     # ============ MEMORY OPTIMIZATION 4: OPTIMIZED LORA CONFIG ============
-    # Use lower rank and fewer modules to reduce memory requirements
     peft_config = LoraConfig(
         task_type=TaskType.CAUSAL_LM,
         inference_mode=False,
         r=4,              # REDUCED from default 8/16 to save memory
         lora_alpha=16,    # Scaling factor
         lora_dropout=0.1, # Dropout probability for regularization
-        # Target specific modules instead of all linear layers to save memory
         target_modules=["q_proj", "v_proj"],  # Only attention query and value projections
     )
     logger.info("Using optimized LoRA parameters with reduced rank (r=4) and targeted modules")
@@ -125,11 +119,9 @@ def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
         per_device_train_batch_size=actual_batch_size,
         per_device_eval_batch_size=actual_batch_size,
         # ============ MEMORY OPTIMIZATION 6: MIXED PRECISION TRAINING ============
-        # Mixed precision significantly reduces memory usage
         fp16=True,  # Use FP16 for mixed precision training
         # ============ MEMORY OPTIMIZATION 7: GRADIENT ACCUMULATION ============
-        # Simulate larger batch sizes without the memory cost
-        gradient_accumulation_steps=4,  # Accumulate gradients over 4 steps (effective batch size = 8*4=32)
         # ============ MEMORY OPTIMIZATION 8: GRADIENT CHECKPOINTING IN ARGS ============
         gradient_checkpointing=True,
         # Other parameters
@@ -155,7 +147,6 @@ def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
     )
     # ============ MEMORY OPTIMIZATION 11: MANAGE CUDA CACHE ============
-    # Clear CUDA cache before training to start with a clean memory state
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
         logger.info("CUDA cache cleared before training")
@@ -247,6 +238,5 @@ with gr.Blocks() as demo:
             outputs=training_output
         )
-# Launch the Space
-spaces.zero.mount()
 demo.queue().launch(debug=True)

 from datasets import Dataset
 from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
 from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
+import spaces  # Import the spaces library
 # Initialize logging
 import logging
     # ============ MEMORY OPTIMIZATION 1: REDUCED BATCH SIZE ============
     # A smaller batch size dramatically reduces memory usage during training
     actual_batch_size = 8 if batch_size is None else min(batch_size, 8)
     logger.info(f"Using batch size: {actual_batch_size} (reduced from original to save memory)")
     # ============ MEMORY OPTIMIZATION 2: 8-bit QUANTIZATION ============
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         load_in_8bit=True,  # Use 8-bit quantization to reduce memory usage
     model = prepare_model_for_kbit_training(model)
     # ============ MEMORY OPTIMIZATION 3: GRADIENT CHECKPOINTING ============
     model.gradient_checkpointing_enable()
     logger.info("Gradient checkpointing enabled: trading computation for memory savings")
     # ============ MEMORY OPTIMIZATION 4: OPTIMIZED LORA CONFIG ============
     peft_config = LoraConfig(
         task_type=TaskType.CAUSAL_LM,
         inference_mode=False,
         r=4,              # REDUCED from default 8/16 to save memory
         lora_alpha=16,    # Scaling factor
         lora_dropout=0.1, # Dropout probability for regularization
         target_modules=["q_proj", "v_proj"],  # Only attention query and value projections
     )
     logger.info("Using optimized LoRA parameters with reduced rank (r=4) and targeted modules")
         per_device_train_batch_size=actual_batch_size,
         per_device_eval_batch_size=actual_batch_size,
         # ============ MEMORY OPTIMIZATION 6: MIXED PRECISION TRAINING ============
         fp16=True,  # Use FP16 for mixed precision training
         # ============ MEMORY OPTIMIZATION 7: GRADIENT ACCUMULATION ============
+        gradient_accumulation_steps=4,  # Accumulate gradients over 4 steps
         # ============ MEMORY OPTIMIZATION 8: GRADIENT CHECKPOINTING IN ARGS ============
         gradient_checkpointing=True,
         # Other parameters
     )
     # ============ MEMORY OPTIMIZATION 11: MANAGE CUDA CACHE ============
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
         logger.info("CUDA cache cleared before training")
             outputs=training_output
         )
+# Launch the app - REMOVED the spaces.zero.mount() call that was causing the error
 demo.queue().launch(debug=True)