Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,157 +2,150 @@ import gradio as gr
|
|
2 |
import pandas as pd
|
3 |
import torch
|
4 |
import os
|
|
|
5 |
from datasets import Dataset
|
6 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
|
7 |
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
|
8 |
-
import spaces # Import the spaces library
|
9 |
-
|
10 |
-
# Initialize logging
|
11 |
import logging
|
12 |
-
|
13 |
-
logger = logging.getLogger(__name__)
|
14 |
|
15 |
-
#
|
16 |
-
|
17 |
-
try:
|
18 |
-
df = pd.read_csv(csv_file)
|
19 |
-
logger.info(f"CSV columns: {df.columns.tolist()}")
|
20 |
-
logger.info(f"Total rows in CSV: {len(df)}")
|
21 |
-
return df
|
22 |
-
except Exception as e:
|
23 |
-
logger.error(f"Error loading CSV: {e}")
|
24 |
-
return None
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
# Extract and format data
|
29 |
-
logger.info(f"Using columns: {teacher_col} (teacher) and {student_col} (student)")
|
30 |
-
|
31 |
-
formatted_data = []
|
32 |
-
for i in range(min(num_samples, len(df))):
|
33 |
-
teacher_text = str(df.iloc[i][teacher_col])
|
34 |
-
student_text = str(df.iloc[i][student_col])
|
35 |
-
|
36 |
-
# Create prompt
|
37 |
-
formatted_text = f"### Teacher: {teacher_text}\n### Student: {student_text}"
|
38 |
-
formatted_data.append({"text": formatted_text})
|
39 |
-
|
40 |
-
logger.info(f"Created {len(formatted_data)} formatted examples")
|
41 |
-
|
42 |
-
# Create dataset
|
43 |
-
dataset = Dataset.from_list(formatted_data)
|
44 |
-
|
45 |
-
# Split dataset
|
46 |
-
train_val_split = dataset.train_test_split(test_size=0.1, seed=42)
|
47 |
-
|
48 |
-
return train_val_split
|
49 |
|
50 |
-
#
|
51 |
-
def tokenize_data(dataset, tokenizer, max_length=512):
|
52 |
-
def tokenize_function(examples):
|
53 |
-
return tokenizer(
|
54 |
-
examples["text"],
|
55 |
-
truncation=True,
|
56 |
-
max_length=max_length,
|
57 |
-
padding="max_length"
|
58 |
-
)
|
59 |
-
|
60 |
-
tokenized_dataset = dataset.map(tokenize_function, batched=True)
|
61 |
-
return tokenized_dataset
|
62 |
|
63 |
-
# Main fine-tuning function with memory optimizations
|
64 |
def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
|
65 |
"""
|
66 |
-
Fine-tune a model with
|
67 |
"""
|
68 |
logger.info(f"Using model: {model_id}")
|
69 |
|
|
|
|
|
|
|
|
|
|
|
70 |
# Load tokenizer
|
71 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
72 |
if tokenizer.pad_token is None:
|
73 |
tokenizer.pad_token = tokenizer.eos_token
|
74 |
|
75 |
-
# ============ MEMORY OPTIMIZATION 1:
|
76 |
-
#
|
77 |
-
actual_batch_size =
|
78 |
-
logger.info(f"Using batch size: {actual_batch_size}
|
|
|
|
|
|
|
|
|
79 |
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
model = AutoModelForCausalLM.from_pretrained(
|
82 |
model_id,
|
83 |
-
|
84 |
-
device_map="auto",
|
85 |
-
use_cache=False,
|
86 |
-
torch_dtype=torch.float16,
|
|
|
|
|
|
|
|
|
87 |
)
|
88 |
|
89 |
-
# Count model parameters
|
90 |
logger.info(f"Model parameters: {model.num_parameters():,}")
|
91 |
|
92 |
-
# Prepare model for training
|
93 |
model = prepare_model_for_kbit_training(model)
|
94 |
|
95 |
-
#
|
96 |
model.gradient_checkpointing_enable()
|
97 |
-
logger.info("Gradient checkpointing enabled
|
98 |
|
99 |
-
# ============ MEMORY OPTIMIZATION 4:
|
|
|
100 |
peft_config = LoraConfig(
|
101 |
task_type=TaskType.CAUSAL_LM,
|
102 |
inference_mode=False,
|
103 |
-
r=
|
104 |
-
lora_alpha=
|
105 |
-
lora_dropout=0.
|
106 |
-
target_modules=["q_proj", "v_proj"], # Only
|
107 |
)
|
108 |
-
logger.info("Using
|
109 |
|
110 |
-
# Apply LoRA adapters
|
111 |
model = get_peft_model(model, peft_config)
|
112 |
-
model.print_trainable_parameters()
|
113 |
|
114 |
-
# Define training arguments
|
115 |
training_args = TrainingArguments(
|
116 |
output_dir=output_dir,
|
117 |
num_train_epochs=epochs,
|
118 |
-
# ============ MEMORY OPTIMIZATION 5:
|
119 |
per_device_train_batch_size=actual_batch_size,
|
120 |
per_device_eval_batch_size=actual_batch_size,
|
121 |
-
#
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
# ============ MEMORY OPTIMIZATION 8: GRADIENT CHECKPOINTING IN ARGS ============
|
126 |
gradient_checkpointing=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
# Other parameters
|
128 |
-
|
129 |
-
save_strategy="epoch",
|
130 |
-
evaluation_strategy="epoch",
|
131 |
-
learning_rate=2e-4,
|
132 |
weight_decay=0.01,
|
133 |
warmup_ratio=0.03,
|
134 |
-
#
|
135 |
-
|
136 |
-
# ============ MEMORY OPTIMIZATION 10: REDUCED LOGGING MEMORY ============
|
137 |
-
report_to="none", # Disable extra logging to save memory
|
138 |
)
|
139 |
|
140 |
-
# Initialize
|
141 |
trainer = Trainer(
|
142 |
model=model,
|
143 |
args=training_args,
|
144 |
train_dataset=train_data["train"],
|
145 |
-
|
146 |
-
tokenizer=tokenizer,
|
147 |
)
|
148 |
|
149 |
-
#
|
150 |
if torch.cuda.is_available():
|
151 |
torch.cuda.empty_cache()
|
152 |
-
|
|
|
153 |
|
154 |
# Start training
|
155 |
-
logger.info("Starting training...")
|
156 |
trainer.train()
|
157 |
|
158 |
# Save the model
|
@@ -162,81 +155,4 @@ def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
|
|
162 |
|
163 |
return model, tokenizer
|
164 |
|
165 |
-
# Gradio interface
|
166 |
-
def process_csv(file, teacher_col, student_col, num_samples):
|
167 |
-
df = load_data(file.name)
|
168 |
-
if df is None:
|
169 |
-
return "Error loading CSV file"
|
170 |
-
return f"CSV loaded successfully with {len(df)} rows"
|
171 |
-
|
172 |
-
def start_fine_tuning(file, teacher_col, student_col, model_id, epochs, batch_size, num_samples):
|
173 |
-
try:
|
174 |
-
# Load and process data
|
175 |
-
df = load_data(file.name)
|
176 |
-
if df is None:
|
177 |
-
return "Error loading CSV file"
|
178 |
-
|
179 |
-
# Prepare dataset
|
180 |
-
dataset = prepare_dataset(df, teacher_col, student_col, num_samples=int(num_samples))
|
181 |
-
|
182 |
-
# Load tokenizer for preprocessing
|
183 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
184 |
-
if tokenizer.pad_token is None:
|
185 |
-
tokenizer.pad_token = tokenizer.eos_token
|
186 |
-
|
187 |
-
# Tokenize dataset
|
188 |
-
tokenized_dataset = {
|
189 |
-
"train": tokenize_data(dataset["train"], tokenizer),
|
190 |
-
"validation": tokenize_data(dataset["test"], tokenizer),
|
191 |
-
}
|
192 |
-
|
193 |
-
# Create output directory
|
194 |
-
output_dir = "./fine_tuned_model"
|
195 |
-
os.makedirs(output_dir, exist_ok=True)
|
196 |
-
|
197 |
-
# Finetune model with memory optimizations
|
198 |
-
finetune_model(
|
199 |
-
model_id=model_id,
|
200 |
-
train_data=tokenized_dataset,
|
201 |
-
output_dir=output_dir,
|
202 |
-
epochs=int(epochs),
|
203 |
-
batch_size=int(batch_size),
|
204 |
-
)
|
205 |
-
|
206 |
-
return "Fine-tuning completed successfully!"
|
207 |
-
|
208 |
-
except Exception as e:
|
209 |
-
logger.error(f"Error during fine-tuning: {e}")
|
210 |
-
return f"Error during fine-tuning: {str(e)}"
|
211 |
-
|
212 |
-
# Create Gradio interface
|
213 |
-
with gr.Blocks() as demo:
|
214 |
-
gr.Markdown("# Teacher-Student Bot Fine-Tuning")
|
215 |
-
|
216 |
-
with gr.Tab("Upload Data"):
|
217 |
-
file_input = gr.File(label="Upload CSV File")
|
218 |
-
with gr.Row():
|
219 |
-
teacher_col = gr.Textbox(label="Teacher Column", value="Unnamed: 0")
|
220 |
-
student_col = gr.Textbox(label="Student Column", value="idx")
|
221 |
-
num_samples = gr.Slider(label="Number of Samples", minimum=10, maximum=1000, value=100, step=10)
|
222 |
-
upload_btn = gr.Button("Process CSV")
|
223 |
-
csv_output = gr.Textbox(label="CSV Processing Result")
|
224 |
-
upload_btn.click(process_csv, inputs=[file_input, teacher_col, student_col, num_samples], outputs=csv_output)
|
225 |
-
|
226 |
-
with gr.Tab("Fine-Tune"):
|
227 |
-
model_id = gr.Textbox(label="Model ID", value="mistralai/Mistral-7B-v0.1")
|
228 |
-
with gr.Row():
|
229 |
-
batch_size = gr.Number(label="Batch Size", value=8, info="Recommended: 8 or lower for 7B models")
|
230 |
-
epochs = gr.Number(label="Number of Epochs", value=2)
|
231 |
-
|
232 |
-
training_btn = gr.Button("Start Fine-Tuning")
|
233 |
-
training_output = gr.Textbox(label="Training Progress")
|
234 |
-
|
235 |
-
training_btn.click(
|
236 |
-
start_fine_tuning,
|
237 |
-
inputs=[file_input, teacher_col, student_col, model_id, epochs, batch_size, num_samples],
|
238 |
-
outputs=training_output
|
239 |
-
)
|
240 |
-
|
241 |
-
# Launch the app - REMOVED the spaces.zero.mount() call that was causing the error
|
242 |
-
demo.queue().launch(debug=True)
|
|
|
2 |
import pandas as pd
|
3 |
import torch
|
4 |
import os
|
5 |
+
import gc
|
6 |
from datasets import Dataset
|
7 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
|
8 |
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
|
|
|
|
|
|
|
9 |
import logging
|
10 |
+
import os
|
|
|
11 |
|
12 |
+
# Set environment variables for memory management
|
13 |
+
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
+
logging.basicConfig(level=logging.INFO)
|
16 |
+
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
+
# [Your existing load_data, prepare_dataset, and tokenize_data functions]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
|
|
20 |
def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
|
21 |
"""
|
22 |
+
Fine-tune a model with ultra aggressive memory optimizations for small GPUs
|
23 |
"""
|
24 |
logger.info(f"Using model: {model_id}")
|
25 |
|
26 |
+
# Force CUDA garbage collection
|
27 |
+
if torch.cuda.is_available():
|
28 |
+
torch.cuda.empty_cache()
|
29 |
+
gc.collect()
|
30 |
+
|
31 |
# Load tokenizer
|
32 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
33 |
if tokenizer.pad_token is None:
|
34 |
tokenizer.pad_token = tokenizer.eos_token
|
35 |
|
36 |
+
# ============ MEMORY OPTIMIZATION 1: MICRO BATCH SIZE ============
|
37 |
+
# Use batch size of 1 since we have only ~15GB GPU
|
38 |
+
actual_batch_size = 1
|
39 |
+
logger.info(f"Using micro batch size: {actual_batch_size} for ~15GB GPU")
|
40 |
+
|
41 |
+
# ============ MEMORY OPTIMIZATION 2: 4-bit QUANTIZATION ============
|
42 |
+
# 4-bit is more memory efficient than 8-bit
|
43 |
+
from transformers import BitsAndBytesConfig
|
44 |
|
45 |
+
bnb_config = BitsAndBytesConfig(
|
46 |
+
load_in_4bit=True,
|
47 |
+
bnb_4bit_quant_type="nf4",
|
48 |
+
bnb_4bit_compute_dtype=torch.float16,
|
49 |
+
bnb_4bit_use_double_quant=True,
|
50 |
+
)
|
51 |
+
|
52 |
+
# Load model with 4-bit quantization
|
53 |
model = AutoModelForCausalLM.from_pretrained(
|
54 |
model_id,
|
55 |
+
quantization_config=bnb_config,
|
56 |
+
device_map="auto",
|
57 |
+
use_cache=False,
|
58 |
+
torch_dtype=torch.float16,
|
59 |
+
# ============ MEMORY OPTIMIZATION 3: MODEL LOADING OPTIONS ============
|
60 |
+
max_memory={0: "10GB"}, # Limit memory usage
|
61 |
+
offload_folder="offload", # Set offload folder
|
62 |
+
offload_state_dict=True, # Offload state dict to CPU
|
63 |
)
|
64 |
|
|
|
65 |
logger.info(f"Model parameters: {model.num_parameters():,}")
|
66 |
|
67 |
+
# Prepare model for training
|
68 |
model = prepare_model_for_kbit_training(model)
|
69 |
|
70 |
+
# Enable gradient checkpointing
|
71 |
model.gradient_checkpointing_enable()
|
72 |
+
logger.info("Gradient checkpointing enabled")
|
73 |
|
74 |
+
# ============ MEMORY OPTIMIZATION 4: MINIMAL LORA CONFIG ============
|
75 |
+
# Use absolute minimum LoRA configuration
|
76 |
peft_config = LoraConfig(
|
77 |
task_type=TaskType.CAUSAL_LM,
|
78 |
inference_mode=False,
|
79 |
+
r=2, # Minimal rank
|
80 |
+
lora_alpha=8, # Reduced alpha
|
81 |
+
lora_dropout=0.05, # Reduced dropout
|
82 |
+
target_modules=["q_proj", "v_proj"], # Only query and value projections
|
83 |
)
|
84 |
+
logger.info("Using minimal LoRA parameters: r=2, target=q_proj,v_proj only")
|
85 |
|
86 |
+
# Apply LoRA adapters
|
87 |
model = get_peft_model(model, peft_config)
|
88 |
+
model.print_trainable_parameters()
|
89 |
|
90 |
+
# Define training arguments with extreme memory optimization
|
91 |
training_args = TrainingArguments(
|
92 |
output_dir=output_dir,
|
93 |
num_train_epochs=epochs,
|
94 |
+
# ============ MEMORY OPTIMIZATION 5: MICRO BATCH + HUGE ACCUMULATION ============
|
95 |
per_device_train_batch_size=actual_batch_size,
|
96 |
per_device_eval_batch_size=actual_batch_size,
|
97 |
+
gradient_accumulation_steps=16, # Accumulate gradients over many steps
|
98 |
+
# ============ MEMORY OPTIMIZATION 6: MIXED PRECISION ============
|
99 |
+
fp16=True,
|
100 |
+
# ============ MEMORY OPTIMIZATION 7: GRADIENT CHECKPOINTING ============
|
|
|
101 |
gradient_checkpointing=True,
|
102 |
+
# ============ MEMORY OPTIMIZATION 8: MINIMAL EVAL AND LOGGING ============
|
103 |
+
logging_steps=50,
|
104 |
+
save_strategy="no", # Don't save checkpoints during training
|
105 |
+
evaluation_strategy="no", # Skip evaluation to save memory
|
106 |
+
# ============ MEMORY OPTIMIZATION 9: DEEPSPEED OFFLOADING ============
|
107 |
+
deepspeed={
|
108 |
+
"zero_optimization": {
|
109 |
+
"stage": 2,
|
110 |
+
"offload_optimizer": {
|
111 |
+
"device": "cpu",
|
112 |
+
"pin_memory": True
|
113 |
+
},
|
114 |
+
"allgather_partitions": True,
|
115 |
+
"allgather_bucket_size": 5e8,
|
116 |
+
"reduce_scatter": True,
|
117 |
+
"reduce_bucket_size": 5e8,
|
118 |
+
"overlap_comm": True,
|
119 |
+
"contiguous_gradients": True,
|
120 |
+
},
|
121 |
+
"fp16": {
|
122 |
+
"enabled": True
|
123 |
+
}
|
124 |
+
},
|
125 |
# Other parameters
|
126 |
+
learning_rate=1e-4, # Reduced learning rate
|
|
|
|
|
|
|
127 |
weight_decay=0.01,
|
128 |
warmup_ratio=0.03,
|
129 |
+
optim="adamw_hf", # HF's implementation is more memory efficient
|
130 |
+
report_to="none",
|
|
|
|
|
131 |
)
|
132 |
|
133 |
+
# Initialize trainer
|
134 |
trainer = Trainer(
|
135 |
model=model,
|
136 |
args=training_args,
|
137 |
train_dataset=train_data["train"],
|
138 |
+
tokenizer=tokenizer, # Important for tokenization during training
|
|
|
139 |
)
|
140 |
|
141 |
+
# Final memory cleanup before training
|
142 |
if torch.cuda.is_available():
|
143 |
torch.cuda.empty_cache()
|
144 |
+
gc.collect()
|
145 |
+
logger.info("CUDA cache cleared before training")
|
146 |
|
147 |
# Start training
|
148 |
+
logger.info("Starting training with ultra memory-efficient settings...")
|
149 |
trainer.train()
|
150 |
|
151 |
# Save the model
|
|
|
155 |
|
156 |
return model, tokenizer
|
157 |
|
158 |
+
# [Rest of your Gradio interface code]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|