amaltese commited on
Commit
8017cfe
·
verified ·
1 Parent(s): 1872e0d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -172
app.py CHANGED
@@ -2,157 +2,150 @@ import gradio as gr
2
  import pandas as pd
3
  import torch
4
  import os
 
5
  from datasets import Dataset
6
  from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
7
  from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
8
- import spaces # Import the spaces library
9
-
10
- # Initialize logging
11
  import logging
12
- logging.basicConfig(level=logging.INFO)
13
- logger = logging.getLogger(__name__)
14
 
15
- # Function to load and process data
16
- def load_data(csv_file):
17
- try:
18
- df = pd.read_csv(csv_file)
19
- logger.info(f"CSV columns: {df.columns.tolist()}")
20
- logger.info(f"Total rows in CSV: {len(df)}")
21
- return df
22
- except Exception as e:
23
- logger.error(f"Error loading CSV: {e}")
24
- return None
25
 
26
- # Function to prepare dataset
27
- def prepare_dataset(df, teacher_col, student_col, num_samples=100):
28
- # Extract and format data
29
- logger.info(f"Using columns: {teacher_col} (teacher) and {student_col} (student)")
30
-
31
- formatted_data = []
32
- for i in range(min(num_samples, len(df))):
33
- teacher_text = str(df.iloc[i][teacher_col])
34
- student_text = str(df.iloc[i][student_col])
35
-
36
- # Create prompt
37
- formatted_text = f"### Teacher: {teacher_text}\n### Student: {student_text}"
38
- formatted_data.append({"text": formatted_text})
39
-
40
- logger.info(f"Created {len(formatted_data)} formatted examples")
41
-
42
- # Create dataset
43
- dataset = Dataset.from_list(formatted_data)
44
-
45
- # Split dataset
46
- train_val_split = dataset.train_test_split(test_size=0.1, seed=42)
47
-
48
- return train_val_split
49
 
50
- # Function to tokenize data
51
- def tokenize_data(dataset, tokenizer, max_length=512):
52
- def tokenize_function(examples):
53
- return tokenizer(
54
- examples["text"],
55
- truncation=True,
56
- max_length=max_length,
57
- padding="max_length"
58
- )
59
-
60
- tokenized_dataset = dataset.map(tokenize_function, batched=True)
61
- return tokenized_dataset
62
 
63
- # Main fine-tuning function with memory optimizations
64
  def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
65
  """
66
- Fine-tune a model with optimized memory settings to prevent CUDA OOM errors.
67
  """
68
  logger.info(f"Using model: {model_id}")
69
 
 
 
 
 
 
70
  # Load tokenizer
71
  tokenizer = AutoTokenizer.from_pretrained(model_id)
72
  if tokenizer.pad_token is None:
73
  tokenizer.pad_token = tokenizer.eos_token
74
 
75
- # ============ MEMORY OPTIMIZATION 1: REDUCED BATCH SIZE ============
76
- # A smaller batch size dramatically reduces memory usage during training
77
- actual_batch_size = 8 if batch_size is None else min(batch_size, 8)
78
- logger.info(f"Using batch size: {actual_batch_size} (reduced from original to save memory)")
 
 
 
 
79
 
80
- # ============ MEMORY OPTIMIZATION 2: 8-bit QUANTIZATION ============
 
 
 
 
 
 
 
81
  model = AutoModelForCausalLM.from_pretrained(
82
  model_id,
83
- load_in_8bit=True, # Use 8-bit quantization to reduce memory usage
84
- device_map="auto", # Automatically handle model distribution
85
- use_cache=False, # Disable KV cache which uses extra memory
86
- torch_dtype=torch.float16, # Use lower precision
 
 
 
 
87
  )
88
 
89
- # Count model parameters
90
  logger.info(f"Model parameters: {model.num_parameters():,}")
91
 
92
- # Prepare model for training with quantization
93
  model = prepare_model_for_kbit_training(model)
94
 
95
- # ============ MEMORY OPTIMIZATION 3: GRADIENT CHECKPOINTING ============
96
  model.gradient_checkpointing_enable()
97
- logger.info("Gradient checkpointing enabled: trading computation for memory savings")
98
 
99
- # ============ MEMORY OPTIMIZATION 4: OPTIMIZED LORA CONFIG ============
 
100
  peft_config = LoraConfig(
101
  task_type=TaskType.CAUSAL_LM,
102
  inference_mode=False,
103
- r=4, # REDUCED from default 8/16 to save memory
104
- lora_alpha=16, # Scaling factor
105
- lora_dropout=0.1, # Dropout probability for regularization
106
- target_modules=["q_proj", "v_proj"], # Only attention query and value projections
107
  )
108
- logger.info("Using optimized LoRA parameters with reduced rank (r=4) and targeted modules")
109
 
110
- # Apply LoRA adapters to the model
111
  model = get_peft_model(model, peft_config)
112
- model.print_trainable_parameters() # Print trainable parameters info
113
 
114
- # Define training arguments
115
  training_args = TrainingArguments(
116
  output_dir=output_dir,
117
  num_train_epochs=epochs,
118
- # ============ MEMORY OPTIMIZATION 5: REDUCED BATCH SIZE IN ARGS ============
119
  per_device_train_batch_size=actual_batch_size,
120
  per_device_eval_batch_size=actual_batch_size,
121
- # ============ MEMORY OPTIMIZATION 6: MIXED PRECISION TRAINING ============
122
- fp16=True, # Use FP16 for mixed precision training
123
- # ============ MEMORY OPTIMIZATION 7: GRADIENT ACCUMULATION ============
124
- gradient_accumulation_steps=4, # Accumulate gradients over 4 steps
125
- # ============ MEMORY OPTIMIZATION 8: GRADIENT CHECKPOINTING IN ARGS ============
126
  gradient_checkpointing=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  # Other parameters
128
- logging_steps=10,
129
- save_strategy="epoch",
130
- evaluation_strategy="epoch",
131
- learning_rate=2e-4,
132
  weight_decay=0.01,
133
  warmup_ratio=0.03,
134
- # ============ MEMORY OPTIMIZATION 9: REDUCED OPTIMIZER OVERHEAD ============
135
- optim="adamw_torch_fused", # More memory-efficient optimizer
136
- # ============ MEMORY OPTIMIZATION 10: REDUCED LOGGING MEMORY ============
137
- report_to="none", # Disable extra logging to save memory
138
  )
139
 
140
- # Initialize the Trainer
141
  trainer = Trainer(
142
  model=model,
143
  args=training_args,
144
  train_dataset=train_data["train"],
145
- eval_dataset=train_data["validation"],
146
- tokenizer=tokenizer,
147
  )
148
 
149
- # ============ MEMORY OPTIMIZATION 11: MANAGE CUDA CACHE ============
150
  if torch.cuda.is_available():
151
  torch.cuda.empty_cache()
152
- logger.info("CUDA cache cleared before training")
 
153
 
154
  # Start training
155
- logger.info("Starting training...")
156
  trainer.train()
157
 
158
  # Save the model
@@ -162,81 +155,4 @@ def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
162
 
163
  return model, tokenizer
164
 
165
- # Gradio interface functions
166
- def process_csv(file, teacher_col, student_col, num_samples):
167
- df = load_data(file.name)
168
- if df is None:
169
- return "Error loading CSV file"
170
- return f"CSV loaded successfully with {len(df)} rows"
171
-
172
- def start_fine_tuning(file, teacher_col, student_col, model_id, epochs, batch_size, num_samples):
173
- try:
174
- # Load and process data
175
- df = load_data(file.name)
176
- if df is None:
177
- return "Error loading CSV file"
178
-
179
- # Prepare dataset
180
- dataset = prepare_dataset(df, teacher_col, student_col, num_samples=int(num_samples))
181
-
182
- # Load tokenizer for preprocessing
183
- tokenizer = AutoTokenizer.from_pretrained(model_id)
184
- if tokenizer.pad_token is None:
185
- tokenizer.pad_token = tokenizer.eos_token
186
-
187
- # Tokenize dataset
188
- tokenized_dataset = {
189
- "train": tokenize_data(dataset["train"], tokenizer),
190
- "validation": tokenize_data(dataset["test"], tokenizer),
191
- }
192
-
193
- # Create output directory
194
- output_dir = "./fine_tuned_model"
195
- os.makedirs(output_dir, exist_ok=True)
196
-
197
- # Finetune model with memory optimizations
198
- finetune_model(
199
- model_id=model_id,
200
- train_data=tokenized_dataset,
201
- output_dir=output_dir,
202
- epochs=int(epochs),
203
- batch_size=int(batch_size),
204
- )
205
-
206
- return "Fine-tuning completed successfully!"
207
-
208
- except Exception as e:
209
- logger.error(f"Error during fine-tuning: {e}")
210
- return f"Error during fine-tuning: {str(e)}"
211
-
212
- # Create Gradio interface
213
- with gr.Blocks() as demo:
214
- gr.Markdown("# Teacher-Student Bot Fine-Tuning")
215
-
216
- with gr.Tab("Upload Data"):
217
- file_input = gr.File(label="Upload CSV File")
218
- with gr.Row():
219
- teacher_col = gr.Textbox(label="Teacher Column", value="Unnamed: 0")
220
- student_col = gr.Textbox(label="Student Column", value="idx")
221
- num_samples = gr.Slider(label="Number of Samples", minimum=10, maximum=1000, value=100, step=10)
222
- upload_btn = gr.Button("Process CSV")
223
- csv_output = gr.Textbox(label="CSV Processing Result")
224
- upload_btn.click(process_csv, inputs=[file_input, teacher_col, student_col, num_samples], outputs=csv_output)
225
-
226
- with gr.Tab("Fine-Tune"):
227
- model_id = gr.Textbox(label="Model ID", value="mistralai/Mistral-7B-v0.1")
228
- with gr.Row():
229
- batch_size = gr.Number(label="Batch Size", value=8, info="Recommended: 8 or lower for 7B models")
230
- epochs = gr.Number(label="Number of Epochs", value=2)
231
-
232
- training_btn = gr.Button("Start Fine-Tuning")
233
- training_output = gr.Textbox(label="Training Progress")
234
-
235
- training_btn.click(
236
- start_fine_tuning,
237
- inputs=[file_input, teacher_col, student_col, model_id, epochs, batch_size, num_samples],
238
- outputs=training_output
239
- )
240
-
241
- # Launch the app - REMOVED the spaces.zero.mount() call that was causing the error
242
- demo.queue().launch(debug=True)
 
2
  import pandas as pd
3
  import torch
4
  import os
5
+ import gc
6
  from datasets import Dataset
7
  from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
8
  from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
 
 
 
9
  import logging
10
+ import os
 
11
 
12
+ # Set environment variables for memory management
13
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 
 
 
 
 
 
 
 
14
 
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ # [Your existing load_data, prepare_dataset, and tokenize_data functions]
 
 
 
 
 
 
 
 
 
 
 
19
 
 
20
  def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
21
  """
22
+ Fine-tune a model with ultra aggressive memory optimizations for small GPUs
23
  """
24
  logger.info(f"Using model: {model_id}")
25
 
26
+ # Force CUDA garbage collection
27
+ if torch.cuda.is_available():
28
+ torch.cuda.empty_cache()
29
+ gc.collect()
30
+
31
  # Load tokenizer
32
  tokenizer = AutoTokenizer.from_pretrained(model_id)
33
  if tokenizer.pad_token is None:
34
  tokenizer.pad_token = tokenizer.eos_token
35
 
36
+ # ============ MEMORY OPTIMIZATION 1: MICRO BATCH SIZE ============
37
+ # Use batch size of 1 since we have only ~15GB GPU
38
+ actual_batch_size = 1
39
+ logger.info(f"Using micro batch size: {actual_batch_size} for ~15GB GPU")
40
+
41
+ # ============ MEMORY OPTIMIZATION 2: 4-bit QUANTIZATION ============
42
+ # 4-bit is more memory efficient than 8-bit
43
+ from transformers import BitsAndBytesConfig
44
 
45
+ bnb_config = BitsAndBytesConfig(
46
+ load_in_4bit=True,
47
+ bnb_4bit_quant_type="nf4",
48
+ bnb_4bit_compute_dtype=torch.float16,
49
+ bnb_4bit_use_double_quant=True,
50
+ )
51
+
52
+ # Load model with 4-bit quantization
53
  model = AutoModelForCausalLM.from_pretrained(
54
  model_id,
55
+ quantization_config=bnb_config,
56
+ device_map="auto",
57
+ use_cache=False,
58
+ torch_dtype=torch.float16,
59
+ # ============ MEMORY OPTIMIZATION 3: MODEL LOADING OPTIONS ============
60
+ max_memory={0: "10GB"}, # Limit memory usage
61
+ offload_folder="offload", # Set offload folder
62
+ offload_state_dict=True, # Offload state dict to CPU
63
  )
64
 
 
65
  logger.info(f"Model parameters: {model.num_parameters():,}")
66
 
67
+ # Prepare model for training
68
  model = prepare_model_for_kbit_training(model)
69
 
70
+ # Enable gradient checkpointing
71
  model.gradient_checkpointing_enable()
72
+ logger.info("Gradient checkpointing enabled")
73
 
74
+ # ============ MEMORY OPTIMIZATION 4: MINIMAL LORA CONFIG ============
75
+ # Use absolute minimum LoRA configuration
76
  peft_config = LoraConfig(
77
  task_type=TaskType.CAUSAL_LM,
78
  inference_mode=False,
79
+ r=2, # Minimal rank
80
+ lora_alpha=8, # Reduced alpha
81
+ lora_dropout=0.05, # Reduced dropout
82
+ target_modules=["q_proj", "v_proj"], # Only query and value projections
83
  )
84
+ logger.info("Using minimal LoRA parameters: r=2, target=q_proj,v_proj only")
85
 
86
+ # Apply LoRA adapters
87
  model = get_peft_model(model, peft_config)
88
+ model.print_trainable_parameters()
89
 
90
+ # Define training arguments with extreme memory optimization
91
  training_args = TrainingArguments(
92
  output_dir=output_dir,
93
  num_train_epochs=epochs,
94
+ # ============ MEMORY OPTIMIZATION 5: MICRO BATCH + HUGE ACCUMULATION ============
95
  per_device_train_batch_size=actual_batch_size,
96
  per_device_eval_batch_size=actual_batch_size,
97
+ gradient_accumulation_steps=16, # Accumulate gradients over many steps
98
+ # ============ MEMORY OPTIMIZATION 6: MIXED PRECISION ============
99
+ fp16=True,
100
+ # ============ MEMORY OPTIMIZATION 7: GRADIENT CHECKPOINTING ============
 
101
  gradient_checkpointing=True,
102
+ # ============ MEMORY OPTIMIZATION 8: MINIMAL EVAL AND LOGGING ============
103
+ logging_steps=50,
104
+ save_strategy="no", # Don't save checkpoints during training
105
+ evaluation_strategy="no", # Skip evaluation to save memory
106
+ # ============ MEMORY OPTIMIZATION 9: DEEPSPEED OFFLOADING ============
107
+ deepspeed={
108
+ "zero_optimization": {
109
+ "stage": 2,
110
+ "offload_optimizer": {
111
+ "device": "cpu",
112
+ "pin_memory": True
113
+ },
114
+ "allgather_partitions": True,
115
+ "allgather_bucket_size": 5e8,
116
+ "reduce_scatter": True,
117
+ "reduce_bucket_size": 5e8,
118
+ "overlap_comm": True,
119
+ "contiguous_gradients": True,
120
+ },
121
+ "fp16": {
122
+ "enabled": True
123
+ }
124
+ },
125
  # Other parameters
126
+ learning_rate=1e-4, # Reduced learning rate
 
 
 
127
  weight_decay=0.01,
128
  warmup_ratio=0.03,
129
+ optim="adamw_hf", # HF's implementation is more memory efficient
130
+ report_to="none",
 
 
131
  )
132
 
133
+ # Initialize trainer
134
  trainer = Trainer(
135
  model=model,
136
  args=training_args,
137
  train_dataset=train_data["train"],
138
+ tokenizer=tokenizer, # Important for tokenization during training
 
139
  )
140
 
141
+ # Final memory cleanup before training
142
  if torch.cuda.is_available():
143
  torch.cuda.empty_cache()
144
+ gc.collect()
145
+ logger.info("CUDA cache cleared before training")
146
 
147
  # Start training
148
+ logger.info("Starting training with ultra memory-efficient settings...")
149
  trainer.train()
150
 
151
  # Save the model
 
155
 
156
  return model, tokenizer
157
 
158
+ # [Rest of your Gradio interface code]