zetavg commited on
Commit
41468ea
1 Parent(s): 8cb0300
llama_lora/lib/finetune.py CHANGED
@@ -27,7 +27,7 @@ def train(
27
  base_model: Any,
28
  tokenizer: Any,
29
  output_dir: str,
30
- train_dataset_data: List[Any],
31
  # training hyperparams
32
  micro_batch_size: int = 4,
33
  gradient_accumulation_steps: int = 32,
@@ -229,11 +229,11 @@ def train(
229
  )
230
  model = get_peft_model(model, config)
231
 
232
- # If train_dataset_data is a list, convert it to datasets.Dataset
233
- if isinstance(train_dataset_data, list):
234
  with open(os.path.join(output_dir, "train_data_samples.json"), 'w') as file:
235
- json.dump(list(train_dataset_data[:100]), file, indent=2)
236
- train_dataset_data = Dataset.from_list(train_dataset_data)
237
 
238
  if resume_from_checkpoint:
239
  # Check the available weights and load them
@@ -259,7 +259,7 @@ def train(
259
  model.print_trainable_parameters()
260
 
261
  if val_set_size > 0:
262
- train_val = train_dataset_data.train_test_split(
263
  test_size=val_set_size, shuffle=True, seed=42
264
  )
265
  train_data = (
@@ -269,7 +269,7 @@ def train(
269
  train_val["test"].shuffle().map(generate_and_tokenize_prompt)
270
  )
271
  else:
272
- train_data = train_dataset_data.shuffle().map(generate_and_tokenize_prompt)
273
  val_data = None
274
 
275
  if not ddp and torch.cuda.device_count() > 1:
@@ -287,7 +287,7 @@ def train(
287
  warmup_steps=100,
288
  num_train_epochs=num_train_epochs,
289
  learning_rate=learning_rate,
290
- fp16=True,
291
  logging_steps=logging_steps,
292
  optim="adamw_torch",
293
  evaluation_strategy="steps" if val_set_size > 0 else "no",
 
27
  base_model: Any,
28
  tokenizer: Any,
29
  output_dir: str,
30
+ train_data: List[Any],
31
  # training hyperparams
32
  micro_batch_size: int = 4,
33
  gradient_accumulation_steps: int = 32,
 
229
  )
230
  model = get_peft_model(model, config)
231
 
232
+ # If train_data is a list, convert it to datasets.Dataset
233
+ if isinstance(train_data, list):
234
  with open(os.path.join(output_dir, "train_data_samples.json"), 'w') as file:
235
+ json.dump(list(train_data[:100]), file, indent=2)
236
+ train_data = Dataset.from_list(train_data)
237
 
238
  if resume_from_checkpoint:
239
  # Check the available weights and load them
 
259
  model.print_trainable_parameters()
260
 
261
  if val_set_size > 0:
262
+ train_val = train_data.train_test_split(
263
  test_size=val_set_size, shuffle=True, seed=42
264
  )
265
  train_data = (
 
269
  train_val["test"].shuffle().map(generate_and_tokenize_prompt)
270
  )
271
  else:
272
+ train_data = train_data.shuffle().map(generate_and_tokenize_prompt)
273
  val_data = None
274
 
275
  if not ddp and torch.cuda.device_count() > 1:
 
287
  warmup_steps=100,
288
  num_train_epochs=num_train_epochs,
289
  learning_rate=learning_rate,
290
+ # fp16=True,
291
  logging_steps=logging_steps,
292
  optim="adamw_torch",
293
  evaluation_strategy="steps" if val_set_size > 0 else "no",
llama_lora/ui/finetune_ui.py CHANGED
@@ -516,7 +516,7 @@ Train data (first 10):
516
  # 128, # batch_size (is not used, use gradient_accumulation_steps instead)
517
  micro_batch_size=micro_batch_size,
518
  gradient_accumulation_steps=gradient_accumulation_steps,
519
- num_epochs=epochs,
520
  learning_rate=learning_rate,
521
  cutoff_len=max_seq_length,
522
  val_set_size=evaluate_data_count,
 
516
  # 128, # batch_size (is not used, use gradient_accumulation_steps instead)
517
  micro_batch_size=micro_batch_size,
518
  gradient_accumulation_steps=gradient_accumulation_steps,
519
+ num_train_epochs=epochs,
520
  learning_rate=learning_rate,
521
  cutoff_len=max_seq_length,
522
  val_set_size=evaluate_data_count,