Spaces:

amaltese
/

avatartestspace

Runtime error

App Files Files Community

amaltese commited on Feb 26

Commit

8017cfe

verified ·

1 Parent(s): 1872e0d

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -172

app.py CHANGED Viewed

@@ -2,157 +2,150 @@ import gradio as gr
 import pandas as pd
 import torch
 import os
 from datasets import Dataset
 from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
 from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
-import spaces  # Import the spaces library
-# Initialize logging
 import logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# Function to load and process data
-def load_data(csv_file):
-    try:
-        df = pd.read_csv(csv_file)
-        logger.info(f"CSV columns: {df.columns.tolist()}")
-        logger.info(f"Total rows in CSV: {len(df)}")
-        return df
-    except Exception as e:
-        logger.error(f"Error loading CSV: {e}")
-        return None
-# Function to prepare dataset
-def prepare_dataset(df, teacher_col, student_col, num_samples=100):
-    # Extract and format data
-    logger.info(f"Using columns: {teacher_col} (teacher) and {student_col} (student)")
-    formatted_data = []
-    for i in range(min(num_samples, len(df))):
-        teacher_text = str(df.iloc[i][teacher_col])
-        student_text = str(df.iloc[i][student_col])
-        # Create prompt
-        formatted_text = f"### Teacher: {teacher_text}\n### Student: {student_text}"
-        formatted_data.append({"text": formatted_text})
-    logger.info(f"Created {len(formatted_data)} formatted examples")
-    # Create dataset
-    dataset = Dataset.from_list(formatted_data)
-    # Split dataset
-    train_val_split = dataset.train_test_split(test_size=0.1, seed=42)
-    return train_val_split
-# Function to tokenize data
-def tokenize_data(dataset, tokenizer, max_length=512):
-    def tokenize_function(examples):
-        return tokenizer(
-            examples["text"],
-            truncation=True,
-            max_length=max_length,
-            padding="max_length"
-        )
-    tokenized_dataset = dataset.map(tokenize_function, batched=True)
-    return tokenized_dataset
-# Main fine-tuning function with memory optimizations
 def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
     """
-    Fine-tune a model with optimized memory settings to prevent CUDA OOM errors.
     """
     logger.info(f"Using model: {model_id}")
     # Load tokenizer
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
-    # ============ MEMORY OPTIMIZATION 1: REDUCED BATCH SIZE ============
-    # A smaller batch size dramatically reduces memory usage during training
-    actual_batch_size = 8 if batch_size is None else min(batch_size, 8)
-    logger.info(f"Using batch size: {actual_batch_size} (reduced from original to save memory)")
-    # ============ MEMORY OPTIMIZATION 2: 8-bit QUANTIZATION ============
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        load_in_8bit=True,  # Use 8-bit quantization to reduce memory usage
-        device_map="auto",  # Automatically handle model distribution
-        use_cache=False,    # Disable KV cache which uses extra memory
-        torch_dtype=torch.float16,  # Use lower precision
     )
-    # Count model parameters
     logger.info(f"Model parameters: {model.num_parameters():,}")
-    # Prepare model for training with quantization
     model = prepare_model_for_kbit_training(model)
-    # ============ MEMORY OPTIMIZATION 3: GRADIENT CHECKPOINTING ============
     model.gradient_checkpointing_enable()
-    logger.info("Gradient checkpointing enabled: trading computation for memory savings")
-    # ============ MEMORY OPTIMIZATION 4: OPTIMIZED LORA CONFIG ============
     peft_config = LoraConfig(
         task_type=TaskType.CAUSAL_LM,
         inference_mode=False,
-        r=4,              # REDUCED from default 8/16 to save memory
-        lora_alpha=16,    # Scaling factor
-        lora_dropout=0.1, # Dropout probability for regularization
-        target_modules=["q_proj", "v_proj"],  # Only attention query and value projections
     )
-    logger.info("Using optimized LoRA parameters with reduced rank (r=4) and targeted modules")
-    # Apply LoRA adapters to the model
     model = get_peft_model(model, peft_config)
-    model.print_trainable_parameters()  # Print trainable parameters info
-    # Define training arguments
     training_args = TrainingArguments(
         output_dir=output_dir,
         num_train_epochs=epochs,
-        # ============ MEMORY OPTIMIZATION 5: REDUCED BATCH SIZE IN ARGS ============
         per_device_train_batch_size=actual_batch_size,
         per_device_eval_batch_size=actual_batch_size,
-        # ============ MEMORY OPTIMIZATION 6: MIXED PRECISION TRAINING ============
-        fp16=True,  # Use FP16 for mixed precision training
-        # ============ MEMORY OPTIMIZATION 7: GRADIENT ACCUMULATION ============
-        gradient_accumulation_steps=4,  # Accumulate gradients over 4 steps
-        # ============ MEMORY OPTIMIZATION 8: GRADIENT CHECKPOINTING IN ARGS ============
         gradient_checkpointing=True,
         # Other parameters
-        logging_steps=10,
-        save_strategy="epoch",
-        evaluation_strategy="epoch",
-        learning_rate=2e-4,
         weight_decay=0.01,
         warmup_ratio=0.03,
-        # ============ MEMORY OPTIMIZATION 9: REDUCED OPTIMIZER OVERHEAD ============
-        optim="adamw_torch_fused",  # More memory-efficient optimizer
-        # ============ MEMORY OPTIMIZATION 10: REDUCED LOGGING MEMORY ============
-        report_to="none",  # Disable extra logging to save memory
     )
-    # Initialize the Trainer
     trainer = Trainer(
         model=model,
         args=training_args,
         train_dataset=train_data["train"],
-        eval_dataset=train_data["validation"],
-        tokenizer=tokenizer,
     )
-    # ============ MEMORY OPTIMIZATION 11: MANAGE CUDA CACHE ============
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
-        logger.info("CUDA cache cleared before training")
     # Start training
-    logger.info("Starting training...")
     trainer.train()
     # Save the model
@@ -162,81 +155,4 @@ def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
     return model, tokenizer
-# Gradio interface functions
-def process_csv(file, teacher_col, student_col, num_samples):
-    df = load_data(file.name)
-    if df is None:
-        return "Error loading CSV file"
-    return f"CSV loaded successfully with {len(df)} rows"
-def start_fine_tuning(file, teacher_col, student_col, model_id, epochs, batch_size, num_samples):
-    try:
-        # Load and process data
-        df = load_data(file.name)
-        if df is None:
-            return "Error loading CSV file"
-        # Prepare dataset
-        dataset = prepare_dataset(df, teacher_col, student_col, num_samples=int(num_samples))
-        # Load tokenizer for preprocessing
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        if tokenizer.pad_token is None:
-            tokenizer.pad_token = tokenizer.eos_token
-        # Tokenize dataset
-        tokenized_dataset = {
-            "train": tokenize_data(dataset["train"], tokenizer),
-            "validation": tokenize_data(dataset["test"], tokenizer),
-        }
-        # Create output directory
-        output_dir = "./fine_tuned_model"
-        os.makedirs(output_dir, exist_ok=True)
-        # Finetune model with memory optimizations
-        finetune_model(
-            model_id=model_id,
-            train_data=tokenized_dataset,
-            output_dir=output_dir,
-            epochs=int(epochs),
-            batch_size=int(batch_size),
-        )
-        return "Fine-tuning completed successfully!"
-    except Exception as e:
-        logger.error(f"Error during fine-tuning: {e}")
-        return f"Error during fine-tuning: {str(e)}"
-# Create Gradio interface
-with gr.Blocks() as demo:
-    gr.Markdown("# Teacher-Student Bot Fine-Tuning")
-    with gr.Tab("Upload Data"):
-        file_input = gr.File(label="Upload CSV File")
-        with gr.Row():
-            teacher_col = gr.Textbox(label="Teacher Column", value="Unnamed: 0")
-            student_col = gr.Textbox(label="Student Column", value="idx")
-        num_samples = gr.Slider(label="Number of Samples", minimum=10, maximum=1000, value=100, step=10)
-        upload_btn = gr.Button("Process CSV")
-        csv_output = gr.Textbox(label="CSV Processing Result")
-        upload_btn.click(process_csv, inputs=[file_input, teacher_col, student_col, num_samples], outputs=csv_output)
-    with gr.Tab("Fine-Tune"):
-        model_id = gr.Textbox(label="Model ID", value="mistralai/Mistral-7B-v0.1")
-        with gr.Row():
-            batch_size = gr.Number(label="Batch Size", value=8, info="Recommended: 8 or lower for 7B models")
-            epochs = gr.Number(label="Number of Epochs", value=2)
-        training_btn = gr.Button("Start Fine-Tuning")
-        training_output = gr.Textbox(label="Training Progress")
-        training_btn.click(
-            start_fine_tuning,
-            inputs=[file_input, teacher_col, student_col, model_id, epochs, batch_size, num_samples],
-            outputs=training_output
-        )
-# Launch the app - REMOVED the spaces.zero.mount() call that was causing the error
-demo.queue().launch(debug=True)

 import pandas as pd
 import torch
 import os
+import gc
 from datasets import Dataset
 from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
 from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
 import logging
+import os
+# Set environment variables for memory management
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# [Your existing load_data, prepare_dataset, and tokenize_data functions]
 def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
     """
+    Fine-tune a model with ultra aggressive memory optimizations for small GPUs
     """
     logger.info(f"Using model: {model_id}")
+    # Force CUDA garbage collection
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        gc.collect()
     # Load tokenizer
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
+    # ============ MEMORY OPTIMIZATION 1: MICRO BATCH SIZE ============
+    # Use batch size of 1 since we have only ~15GB GPU
+    actual_batch_size = 1
+    logger.info(f"Using micro batch size: {actual_batch_size} for ~15GB GPU")
+    # ============ MEMORY OPTIMIZATION 2: 4-bit QUANTIZATION ============
+    # 4-bit is more memory efficient than 8-bit
+    from transformers import BitsAndBytesConfig
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.float16,
+        bnb_4bit_use_double_quant=True,
+    )
+    # Load model with 4-bit quantization
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        quantization_config=bnb_config,
+        device_map="auto",
+        use_cache=False,
+        torch_dtype=torch.float16,
+        # ============ MEMORY OPTIMIZATION 3: MODEL LOADING OPTIONS ============
+        max_memory={0: "10GB"},  # Limit memory usage
+        offload_folder="offload",  # Set offload folder
+        offload_state_dict=True,  # Offload state dict to CPU
     )
     logger.info(f"Model parameters: {model.num_parameters():,}")
+    # Prepare model for training
     model = prepare_model_for_kbit_training(model)
+    # Enable gradient checkpointing
     model.gradient_checkpointing_enable()
+    logger.info("Gradient checkpointing enabled")
+    # ============ MEMORY OPTIMIZATION 4: MINIMAL LORA CONFIG ============
+    # Use absolute minimum LoRA configuration
     peft_config = LoraConfig(
         task_type=TaskType.CAUSAL_LM,
         inference_mode=False,
+        r=2,               # Minimal rank
+        lora_alpha=8,      # Reduced alpha
+        lora_dropout=0.05, # Reduced dropout
+        target_modules=["q_proj", "v_proj"],  # Only query and value projections
     )
+    logger.info("Using minimal LoRA parameters: r=2, target=q_proj,v_proj only")
+    # Apply LoRA adapters
     model = get_peft_model(model, peft_config)
+    model.print_trainable_parameters()
+    # Define training arguments with extreme memory optimization
     training_args = TrainingArguments(
         output_dir=output_dir,
         num_train_epochs=epochs,
+        # ============ MEMORY OPTIMIZATION 5: MICRO BATCH + HUGE ACCUMULATION ============
         per_device_train_batch_size=actual_batch_size,
         per_device_eval_batch_size=actual_batch_size,
+        gradient_accumulation_steps=16,  # Accumulate gradients over many steps
+        # ============ MEMORY OPTIMIZATION 6: MIXED PRECISION ============
+        fp16=True,
+        # ============ MEMORY OPTIMIZATION 7: GRADIENT CHECKPOINTING ============
         gradient_checkpointing=True,
+        # ============ MEMORY OPTIMIZATION 8: MINIMAL EVAL AND LOGGING ============
+        logging_steps=50,
+        save_strategy="no",       # Don't save checkpoints during training
+        evaluation_strategy="no", # Skip evaluation to save memory
+        # ============ MEMORY OPTIMIZATION 9: DEEPSPEED OFFLOADING ============
+        deepspeed={
+            "zero_optimization": {
+                "stage": 2,
+                "offload_optimizer": {
+                    "device": "cpu",
+                    "pin_memory": True
+                },
+                "allgather_partitions": True,
+                "allgather_bucket_size": 5e8,
+                "reduce_scatter": True,
+                "reduce_bucket_size": 5e8,
+                "overlap_comm": True,
+                "contiguous_gradients": True,
+            },
+            "fp16": {
+                "enabled": True
+            }
+        },
         # Other parameters
+        learning_rate=1e-4,  # Reduced learning rate
         weight_decay=0.01,
         warmup_ratio=0.03,
+        optim="adamw_hf",  # HF's implementation is more memory efficient
+        report_to="none",
     )
+    # Initialize trainer
     trainer = Trainer(
         model=model,
         args=training_args,
         train_dataset=train_data["train"],
+        tokenizer=tokenizer,  # Important for tokenization during training
     )
+    # Final memory cleanup before training
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
+        gc.collect()
+    logger.info("CUDA cache cleared before training")
     # Start training
+    logger.info("Starting training with ultra memory-efficient settings...")
     trainer.train()
     # Save the model
     return model, tokenizer
+# [Rest of your Gradio interface code]