Kevin Fink commited on
Commit
1c20c42
·
1 Parent(s): e504e90
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -38,9 +38,12 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
38
  #model = get_peft_model(model, lora_config)
39
  tokenizer = AutoTokenizer.from_pretrained(model_name)
40
 
 
 
 
41
  # Tokenize the dataset
42
  def tokenize_function(examples):
43
- max_length = 16
44
  # Assuming 'text' is the input and 'target' is the expected output
45
  model_inputs = tokenizer(
46
  examples['text'],
@@ -62,7 +65,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
62
  model_inputs["labels"] = labels["input_ids"]
63
  return model_inputs
64
 
65
- tokenized_datasets = dataset.map(tokenize_function, batched=True)
66
  data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
67
 
68
  # Set training arguments
 
38
  #model = get_peft_model(model, lora_config)
39
  tokenizer = AutoTokenizer.from_pretrained(model_name)
40
 
41
+ chunk_size = 1000
42
+ max_length = 128
43
+
44
  # Tokenize the dataset
45
  def tokenize_function(examples):
46
+
47
  # Assuming 'text' is the input and 'target' is the expected output
48
  model_inputs = tokenizer(
49
  examples['text'],
 
65
  model_inputs["labels"] = labels["input_ids"]
66
  return model_inputs
67
 
68
+ tokenized_datasets = dataset.map(tokenize_function, batched=True, batch_size=16)
69
  data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
70
 
71
  # Set training arguments