Kevin Fink commited on
Commit
e2f4c27
·
1 Parent(s): f4325ab
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -26,7 +26,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
26
  model = get_peft_model(model, lora_config)
27
  tokenizer = AutoTokenizer.from_pretrained(model_name)
28
 
29
- max_length = 64
30
  try:
31
  tokenized_train_dataset = load_from_disk(f'{hub_id.strip()}_train_dataset')
32
  tokenized_test_dataset = load_from_disk(f'{hub_id.strip()}_test_dataset')
@@ -56,7 +56,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
56
  model_inputs["labels"] = labels["input_ids"]
57
  return model_inputs
58
 
59
- tokenized_datasets = dataset.map(tokenize_function, batched=True)
60
 
61
  tokenized_datasets['train'].save_to_disk(f'{hub_id.strip()}_train_dataset')
62
  tokenized_datasets['validation'].save_to_disk(f'{hub_id.strip()}_test_dataset')
 
26
  model = get_peft_model(model, lora_config)
27
  tokenizer = AutoTokenizer.from_pretrained(model_name)
28
 
29
+ max_length = 128
30
  try:
31
  tokenized_train_dataset = load_from_disk(f'{hub_id.strip()}_train_dataset')
32
  tokenized_test_dataset = load_from_disk(f'{hub_id.strip()}_test_dataset')
 
56
  model_inputs["labels"] = labels["input_ids"]
57
  return model_inputs
58
 
59
+ tokenized_datasets = dataset.map(tokenize_function, batched=True, batch_size=32)
60
 
61
  tokenized_datasets['train'].save_to_disk(f'{hub_id.strip()}_train_dataset')
62
  tokenized_datasets['validation'].save_to_disk(f'{hub_id.strip()}_test_dataset')