Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,307 +1,252 @@
|
|
1 |
import gradio as gr
|
2 |
-
import os
|
3 |
-
import torch
|
4 |
-
import json
|
5 |
import pandas as pd
|
|
|
|
|
6 |
from datasets import Dataset
|
7 |
-
from transformers import
|
8 |
-
|
9 |
-
|
10 |
-
TrainingArguments,
|
11 |
-
Trainer,
|
12 |
-
DataCollatorForLanguageModeling
|
13 |
-
)
|
14 |
-
from peft import (
|
15 |
-
LoraConfig,
|
16 |
-
get_peft_model,
|
17 |
-
prepare_model_for_kbit_training,
|
18 |
-
PeftModel
|
19 |
-
)
|
20 |
-
import spaces
|
21 |
-
from huggingface_hub import login
|
22 |
|
23 |
-
#
|
24 |
-
|
25 |
-
|
|
|
26 |
|
27 |
-
#
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
teacher_col = None
|
45 |
-
student_col = None
|
46 |
-
|
47 |
-
for col in df.columns:
|
48 |
-
col_lower = col.lower()
|
49 |
-
if 'teacher' in col_lower or 'instructor' in col_lower or 'prompt' in col_lower:
|
50 |
-
teacher_col = col
|
51 |
-
elif 'student' in col_lower or 'response' in col_lower or 'answer' in col_lower:
|
52 |
-
student_col = col
|
53 |
-
|
54 |
-
# If we couldn't identify columns, use the first two
|
55 |
-
if teacher_col is None or student_col is None:
|
56 |
-
teacher_col = df.columns[0]
|
57 |
-
student_col = df.columns[1]
|
58 |
-
print(f"Using columns: {teacher_col} (teacher) and {student_col} (student)")
|
59 |
-
else:
|
60 |
-
print(f"Identified columns: {teacher_col} (teacher) and {student_col} (student)")
|
61 |
-
|
62 |
-
# Sample rows
|
63 |
-
if sample_size >= len(df):
|
64 |
-
sampled_df = df
|
65 |
-
else:
|
66 |
-
sampled_df = df.sample(n=sample_size, random_state=42)
|
67 |
-
|
68 |
-
# Format data
|
69 |
-
texts = []
|
70 |
-
for _, row in sampled_df.iterrows():
|
71 |
-
teacher_text = str(row[teacher_col]).strip()
|
72 |
-
student_text = str(row[student_col]).strip()
|
73 |
|
74 |
-
#
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
# Format according to the document format:
|
79 |
-
# <s> [INST] Teacher ** <Dialogue> [/INST] Student** <Dialogue> </s>
|
80 |
-
formatted_text = f"<s> [INST] Teacher ** {teacher_text} [/INST] Student** {student_text} </s>"
|
81 |
-
texts.append(formatted_text)
|
82 |
|
83 |
-
|
84 |
-
return Dataset.from_dict({"text": texts})
|
85 |
-
|
86 |
-
@spaces.GPU
|
87 |
-
def finetune_model(csv_file, sample_size=100, num_epochs=3, progress=gr.Progress()):
|
88 |
-
"""Fine-tune the model and return results"""
|
89 |
-
# Check GPU
|
90 |
-
if torch.cuda.is_available():
|
91 |
-
print(f"GPU available: {torch.cuda.get_device_name(0)}")
|
92 |
-
device = torch.device("cuda")
|
93 |
-
else:
|
94 |
-
print("No GPU available, fine-tuning will be extremely slow!")
|
95 |
-
device = torch.device("cpu")
|
96 |
|
97 |
-
#
|
98 |
-
|
99 |
-
dataset = sample_from_csv(csv_file, sample_size)
|
100 |
|
101 |
# Split dataset
|
102 |
-
|
103 |
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
model_name = "mistralai/Mistral-7B-v0.1"
|
109 |
-
print(f"Using model: {model_name}")
|
110 |
-
|
111 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
|
112 |
-
tokenizer.pad_token = tokenizer.eos_token
|
113 |
-
|
114 |
-
# Tokenize dataset
|
115 |
def tokenize_function(examples):
|
116 |
-
return tokenizer(
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
|
|
131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
model = AutoModelForCausalLM.from_pretrained(
|
133 |
-
|
134 |
-
|
135 |
-
device_map="auto",
|
136 |
-
|
|
|
137 |
)
|
138 |
|
139 |
-
#
|
|
|
|
|
|
|
140 |
model = prepare_model_for_kbit_training(model)
|
141 |
-
model = get_peft_model(model, lora_config)
|
142 |
|
143 |
-
#
|
144 |
-
|
145 |
-
|
146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
|
148 |
-
#
|
149 |
-
output_dir = "mistral7b_finetuned"
|
150 |
training_args = TrainingArguments(
|
151 |
output_dir=output_dir,
|
152 |
-
num_train_epochs=
|
153 |
-
|
154 |
-
|
155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
logging_steps=10,
|
|
|
|
|
157 |
learning_rate=2e-4,
|
158 |
-
weight_decay=0.
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
|
|
163 |
)
|
164 |
|
165 |
-
# Initialize
|
166 |
-
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
|
167 |
trainer = Trainer(
|
168 |
model=model,
|
169 |
args=training_args,
|
170 |
-
train_dataset=
|
171 |
-
eval_dataset=
|
172 |
-
|
173 |
)
|
174 |
|
175 |
-
#
|
176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
trainer.train()
|
178 |
|
179 |
-
# Save model
|
180 |
-
|
181 |
-
trainer.model.save_pretrained(output_dir)
|
182 |
tokenizer.save_pretrained(output_dir)
|
|
|
183 |
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
"
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
)
|
200 |
-
|
201 |
-
# Generate responses
|
202 |
-
results = []
|
203 |
-
for prompt in test_prompts:
|
204 |
-
formatted_prompt = f"<s> [INST] Teacher ** {prompt} [/INST] Student**"
|
205 |
-
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)
|
206 |
|
207 |
-
|
208 |
-
|
209 |
-
**inputs,
|
210 |
-
max_length=200,
|
211 |
-
temperature=0.7,
|
212 |
-
top_p=0.95,
|
213 |
-
do_sample=True,
|
214 |
-
)
|
215 |
|
216 |
-
|
217 |
-
|
|
|
|
|
218 |
|
219 |
-
|
220 |
-
|
221 |
-
"
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
|
228 |
-
|
229 |
-
|
|
|
230 |
|
231 |
-
#
|
232 |
with gr.Blocks() as demo:
|
233 |
-
gr.Markdown("#
|
234 |
|
235 |
-
with gr.Tab("
|
236 |
-
|
237 |
-
system_output = gr.Textbox(label="System Status", lines=5)
|
238 |
-
|
239 |
-
@spaces.GPU
|
240 |
-
def check_system():
|
241 |
-
status = []
|
242 |
-
# Check GPU
|
243 |
-
if torch.cuda.is_available():
|
244 |
-
status.append(f"✅ GPU AVAILABLE: {torch.cuda.get_device_name(0)}")
|
245 |
-
gpu_memory = f"Total GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB"
|
246 |
-
status.append(gpu_memory)
|
247 |
-
else:
|
248 |
-
status.append("❌ NO GPU DETECTED.")
|
249 |
-
|
250 |
-
# Check HF token
|
251 |
-
if os.environ.get("HF_TOKEN"):
|
252 |
-
status.append("✅ Hugging Face token found")
|
253 |
-
else:
|
254 |
-
status.append("❌ No Hugging Face token found. You may encounter access issues with gated models.")
|
255 |
-
|
256 |
-
# Check if we can access Mistral model
|
257 |
-
try:
|
258 |
-
from huggingface_hub import model_info
|
259 |
-
info = model_info("mistralai/Mistral-7B-v0.1", token=hf_token)
|
260 |
-
status.append(f"✅ Access to Mistral-7B-v0.1 model verified: {info.modelId}")
|
261 |
-
except Exception as e:
|
262 |
-
status.append(f"❌ Cannot access Mistral-7B-v0.1 model: {str(e)}")
|
263 |
-
|
264 |
-
return "\n".join(status)
|
265 |
-
|
266 |
-
check_btn.click(check_system, inputs=[], outputs=[system_output])
|
267 |
-
|
268 |
-
with gr.Tab("Fine-tune Model"):
|
269 |
-
with gr.Row():
|
270 |
-
csv_input = gr.File(label="Upload Teacher-Student CSV")
|
271 |
-
|
272 |
-
with gr.Row():
|
273 |
-
sample_size = gr.Slider(minimum=10, maximum=1000, value=100, step=10, label="Sample Size")
|
274 |
-
epochs = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="Number of Epochs")
|
275 |
-
|
276 |
with gr.Row():
|
277 |
-
|
278 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
with gr.Row():
|
280 |
-
|
281 |
-
|
282 |
-
start_btn.click(finetune_model, inputs=[csv_input, sample_size, epochs], outputs=[output])
|
283 |
-
|
284 |
-
with gr.Tab("About"):
|
285 |
-
gr.Markdown("""
|
286 |
-
## Fine-Tuning Mistral 7B for Student Bot
|
287 |
-
|
288 |
-
This app fine-tunes the original Mistral-7B-v0.1 model to respond like a student to teacher prompts.
|
289 |
-
|
290 |
-
### Requirements
|
291 |
-
- CSV file with teacher-student conversation pairs
|
292 |
-
- GPU acceleration (provided by this Space)
|
293 |
-
- Hugging Face authentication for accessing Mistral-7B-v0.1 (which is a gated model)
|
294 |
|
295 |
-
|
296 |
-
|
297 |
-
2. Set sample size and number of epochs
|
298 |
-
3. Click "Start Fine-Tuning"
|
299 |
-
4. View test results with sample prompts
|
300 |
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
|
|
305 |
|
306 |
-
# Launch
|
307 |
-
|
|
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
2 |
import pandas as pd
|
3 |
+
import torch
|
4 |
+
import os
|
5 |
from datasets import Dataset
|
6 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
|
7 |
+
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
|
8 |
+
import spaces # Import the spaces library for HF Spaces integration
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
+
# Initialize logging
|
11 |
+
import logging
|
12 |
+
logging.basicConfig(level=logging.INFO)
|
13 |
+
logger = logging.getLogger(__name__)
|
14 |
|
15 |
+
# Function to load and process data
|
16 |
+
def load_data(csv_file):
|
17 |
+
try:
|
18 |
+
df = pd.read_csv(csv_file)
|
19 |
+
logger.info(f"CSV columns: {df.columns.tolist()}")
|
20 |
+
logger.info(f"Total rows in CSV: {len(df)}")
|
21 |
+
return df
|
22 |
+
except Exception as e:
|
23 |
+
logger.error(f"Error loading CSV: {e}")
|
24 |
+
return None
|
25 |
|
26 |
+
# Function to prepare dataset
|
27 |
+
def prepare_dataset(df, teacher_col, student_col, num_samples=100):
|
28 |
+
# Extract and format data
|
29 |
+
logger.info(f"Using columns: {teacher_col} (teacher) and {student_col} (student)")
|
30 |
+
|
31 |
+
formatted_data = []
|
32 |
+
for i in range(min(num_samples, len(df))):
|
33 |
+
teacher_text = str(df.iloc[i][teacher_col])
|
34 |
+
student_text = str(df.iloc[i][student_col])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
+
# Create prompt
|
37 |
+
formatted_text = f"### Teacher: {teacher_text}\n### Student: {student_text}"
|
38 |
+
formatted_data.append({"text": formatted_text})
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
+
logger.info(f"Created {len(formatted_data)} formatted examples")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
+
# Create dataset
|
43 |
+
dataset = Dataset.from_list(formatted_data)
|
|
|
44 |
|
45 |
# Split dataset
|
46 |
+
train_val_split = dataset.train_test_split(test_size=0.1, seed=42)
|
47 |
|
48 |
+
return train_val_split
|
49 |
+
|
50 |
+
# Function to tokenize data
|
51 |
+
def tokenize_data(dataset, tokenizer, max_length=512):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
def tokenize_function(examples):
|
53 |
+
return tokenizer(
|
54 |
+
examples["text"],
|
55 |
+
truncation=True,
|
56 |
+
max_length=max_length,
|
57 |
+
padding="max_length"
|
58 |
+
)
|
59 |
+
|
60 |
+
tokenized_dataset = dataset.map(tokenize_function, batched=True)
|
61 |
+
return tokenized_dataset
|
62 |
+
|
63 |
+
# Main fine-tuning function with memory optimizations
|
64 |
+
def finetune_model(model_id, train_data, output_dir, epochs, batch_size=None):
|
65 |
+
"""
|
66 |
+
Fine-tune a model with optimized memory settings to prevent CUDA OOM errors.
|
67 |
+
"""
|
68 |
+
logger.info(f"Using model: {model_id}")
|
69 |
|
70 |
+
# Load tokenizer
|
71 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
72 |
+
if tokenizer.pad_token is None:
|
73 |
+
tokenizer.pad_token = tokenizer.eos_token
|
74 |
+
|
75 |
+
# ============ MEMORY OPTIMIZATION 1: REDUCED BATCH SIZE ============
|
76 |
+
# A smaller batch size dramatically reduces memory usage during training
|
77 |
+
# For 7B models on limited VRAM (40GB), values between 1-8 are recommended
|
78 |
+
actual_batch_size = 8 if batch_size is None else min(batch_size, 8)
|
79 |
+
logger.info(f"Using batch size: {actual_batch_size} (reduced from original to save memory)")
|
80 |
+
|
81 |
+
# ============ MEMORY OPTIMIZATION 2: 8-bit QUANTIZATION ============
|
82 |
+
# Load model in 8-bit to reduce memory footprint during training
|
83 |
model = AutoModelForCausalLM.from_pretrained(
|
84 |
+
model_id,
|
85 |
+
load_in_8bit=True, # Use 8-bit quantization to reduce memory usage
|
86 |
+
device_map="auto", # Automatically handle model distribution
|
87 |
+
use_cache=False, # Disable KV cache which uses extra memory
|
88 |
+
torch_dtype=torch.float16, # Use lower precision
|
89 |
)
|
90 |
|
91 |
+
# Count model parameters
|
92 |
+
logger.info(f"Model parameters: {model.num_parameters():,}")
|
93 |
+
|
94 |
+
# Prepare model for training with quantization
|
95 |
model = prepare_model_for_kbit_training(model)
|
|
|
96 |
|
97 |
+
# ============ MEMORY OPTIMIZATION 3: GRADIENT CHECKPOINTING ============
|
98 |
+
# Enable gradient checkpointing to trade compute for memory
|
99 |
+
# This recomputes forward activations during backward pass instead of storing them
|
100 |
+
model.gradient_checkpointing_enable()
|
101 |
+
logger.info("Gradient checkpointing enabled: trading computation for memory savings")
|
102 |
+
|
103 |
+
# ============ MEMORY OPTIMIZATION 4: OPTIMIZED LORA CONFIG ============
|
104 |
+
# Use lower rank and fewer modules to reduce memory requirements
|
105 |
+
peft_config = LoraConfig(
|
106 |
+
task_type=TaskType.CAUSAL_LM,
|
107 |
+
inference_mode=False,
|
108 |
+
r=4, # REDUCED from default 8/16 to save memory
|
109 |
+
lora_alpha=16, # Scaling factor
|
110 |
+
lora_dropout=0.1, # Dropout probability for regularization
|
111 |
+
# Target specific modules instead of all linear layers to save memory
|
112 |
+
target_modules=["q_proj", "v_proj"], # Only attention query and value projections
|
113 |
+
)
|
114 |
+
logger.info("Using optimized LoRA parameters with reduced rank (r=4) and targeted modules")
|
115 |
+
|
116 |
+
# Apply LoRA adapters to the model
|
117 |
+
model = get_peft_model(model, peft_config)
|
118 |
+
model.print_trainable_parameters() # Print trainable parameters info
|
119 |
|
120 |
+
# Define training arguments
|
|
|
121 |
training_args = TrainingArguments(
|
122 |
output_dir=output_dir,
|
123 |
+
num_train_epochs=epochs,
|
124 |
+
# ============ MEMORY OPTIMIZATION 5: REDUCED BATCH SIZE IN ARGS ============
|
125 |
+
per_device_train_batch_size=actual_batch_size,
|
126 |
+
per_device_eval_batch_size=actual_batch_size,
|
127 |
+
# ============ MEMORY OPTIMIZATION 6: MIXED PRECISION TRAINING ============
|
128 |
+
# Mixed precision significantly reduces memory usage
|
129 |
+
fp16=True, # Use FP16 for mixed precision training
|
130 |
+
# ============ MEMORY OPTIMIZATION 7: GRADIENT ACCUMULATION ============
|
131 |
+
# Simulate larger batch sizes without the memory cost
|
132 |
+
gradient_accumulation_steps=4, # Accumulate gradients over 4 steps (effective batch size = 8*4=32)
|
133 |
+
# ============ MEMORY OPTIMIZATION 8: GRADIENT CHECKPOINTING IN ARGS ============
|
134 |
+
gradient_checkpointing=True,
|
135 |
+
# Other parameters
|
136 |
logging_steps=10,
|
137 |
+
save_strategy="epoch",
|
138 |
+
evaluation_strategy="epoch",
|
139 |
learning_rate=2e-4,
|
140 |
+
weight_decay=0.01,
|
141 |
+
warmup_ratio=0.03,
|
142 |
+
# ============ MEMORY OPTIMIZATION 9: REDUCED OPTIMIZER OVERHEAD ============
|
143 |
+
optim="adamw_torch_fused", # More memory-efficient optimizer
|
144 |
+
# ============ MEMORY OPTIMIZATION 10: REDUCED LOGGING MEMORY ============
|
145 |
+
report_to="none", # Disable extra logging to save memory
|
146 |
)
|
147 |
|
148 |
+
# Initialize the Trainer
|
|
|
149 |
trainer = Trainer(
|
150 |
model=model,
|
151 |
args=training_args,
|
152 |
+
train_dataset=train_data["train"],
|
153 |
+
eval_dataset=train_data["validation"],
|
154 |
+
tokenizer=tokenizer,
|
155 |
)
|
156 |
|
157 |
+
# ============ MEMORY OPTIMIZATION 11: MANAGE CUDA CACHE ============
|
158 |
+
# Clear CUDA cache before training to start with a clean memory state
|
159 |
+
if torch.cuda.is_available():
|
160 |
+
torch.cuda.empty_cache()
|
161 |
+
logger.info("CUDA cache cleared before training")
|
162 |
+
|
163 |
+
# Start training
|
164 |
+
logger.info("Starting training...")
|
165 |
trainer.train()
|
166 |
|
167 |
+
# Save the model
|
168 |
+
model.save_pretrained(output_dir)
|
|
|
169 |
tokenizer.save_pretrained(output_dir)
|
170 |
+
logger.info(f"Model saved to {output_dir}")
|
171 |
|
172 |
+
return model, tokenizer
|
173 |
+
|
174 |
+
# Gradio interface functions
|
175 |
+
def process_csv(file, teacher_col, student_col, num_samples):
|
176 |
+
df = load_data(file.name)
|
177 |
+
if df is None:
|
178 |
+
return "Error loading CSV file"
|
179 |
+
return f"CSV loaded successfully with {len(df)} rows"
|
180 |
+
|
181 |
+
def start_fine_tuning(file, teacher_col, student_col, model_id, epochs, batch_size, num_samples):
|
182 |
+
try:
|
183 |
+
# Load and process data
|
184 |
+
df = load_data(file.name)
|
185 |
+
if df is None:
|
186 |
+
return "Error loading CSV file"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
|
188 |
+
# Prepare dataset
|
189 |
+
dataset = prepare_dataset(df, teacher_col, student_col, num_samples=int(num_samples))
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
|
191 |
+
# Load tokenizer for preprocessing
|
192 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
193 |
+
if tokenizer.pad_token is None:
|
194 |
+
tokenizer.pad_token = tokenizer.eos_token
|
195 |
|
196 |
+
# Tokenize dataset
|
197 |
+
tokenized_dataset = {
|
198 |
+
"train": tokenize_data(dataset["train"], tokenizer),
|
199 |
+
"validation": tokenize_data(dataset["test"], tokenizer),
|
200 |
+
}
|
201 |
+
|
202 |
+
# Create output directory
|
203 |
+
output_dir = "./fine_tuned_model"
|
204 |
+
os.makedirs(output_dir, exist_ok=True)
|
205 |
+
|
206 |
+
# Finetune model with memory optimizations
|
207 |
+
finetune_model(
|
208 |
+
model_id=model_id,
|
209 |
+
train_data=tokenized_dataset,
|
210 |
+
output_dir=output_dir,
|
211 |
+
epochs=int(epochs),
|
212 |
+
batch_size=int(batch_size),
|
213 |
+
)
|
214 |
+
|
215 |
+
return "Fine-tuning completed successfully!"
|
216 |
|
217 |
+
except Exception as e:
|
218 |
+
logger.error(f"Error during fine-tuning: {e}")
|
219 |
+
return f"Error during fine-tuning: {str(e)}"
|
220 |
|
221 |
+
# Create Gradio interface
|
222 |
with gr.Blocks() as demo:
|
223 |
+
gr.Markdown("# Teacher-Student Bot Fine-Tuning")
|
224 |
|
225 |
+
with gr.Tab("Upload Data"):
|
226 |
+
file_input = gr.File(label="Upload CSV File")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
with gr.Row():
|
228 |
+
teacher_col = gr.Textbox(label="Teacher Column", value="Unnamed: 0")
|
229 |
+
student_col = gr.Textbox(label="Student Column", value="idx")
|
230 |
+
num_samples = gr.Slider(label="Number of Samples", minimum=10, maximum=1000, value=100, step=10)
|
231 |
+
upload_btn = gr.Button("Process CSV")
|
232 |
+
csv_output = gr.Textbox(label="CSV Processing Result")
|
233 |
+
upload_btn.click(process_csv, inputs=[file_input, teacher_col, student_col, num_samples], outputs=csv_output)
|
234 |
+
|
235 |
+
with gr.Tab("Fine-Tune"):
|
236 |
+
model_id = gr.Textbox(label="Model ID", value="mistralai/Mistral-7B-v0.1")
|
237 |
with gr.Row():
|
238 |
+
batch_size = gr.Number(label="Batch Size", value=8, info="Recommended: 8 or lower for 7B models")
|
239 |
+
epochs = gr.Number(label="Number of Epochs", value=2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
|
241 |
+
training_btn = gr.Button("Start Fine-Tuning")
|
242 |
+
training_output = gr.Textbox(label="Training Progress")
|
|
|
|
|
|
|
243 |
|
244 |
+
training_btn.click(
|
245 |
+
start_fine_tuning,
|
246 |
+
inputs=[file_input, teacher_col, student_col, model_id, epochs, batch_size, num_samples],
|
247 |
+
outputs=training_output
|
248 |
+
)
|
249 |
|
250 |
+
# Launch the Space
|
251 |
+
spaces.zero.mount()
|
252 |
+
demo.queue().launch(debug=True)
|