Kevin Fink commited on
Commit
ff67bb4
·
1 Parent(s): 33de791
Files changed (1) hide show
  1. app.py +5 -6
app.py CHANGED
@@ -20,7 +20,7 @@ class LoggingCallback(TrainerCallback):
20
  error_rate = 1 - state.best_metric # Assuming best_metric is accuracy
21
  print(f"Current Error Rate: {error_rate:.4f}")
22
 
23
- @spaces.GPU
24
  def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch_size, lr, grad):
25
  try:
26
  login(api_key.strip())
@@ -35,10 +35,9 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
35
 
36
  # Load the model and tokenizer
37
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name.strip(), num_labels=2)
38
- #model = get_peft_model(model, lora_config)
39
  tokenizer = AutoTokenizer.from_pretrained(model_name)
40
 
41
- chunk_size = 1000
42
  max_length = 128
43
 
44
  # Tokenize the dataset
@@ -48,7 +47,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
48
  model_inputs = tokenizer(
49
  examples['text'],
50
  max_length=max_length, # Set to None for dynamic padding
51
- padding=False, # Disable padding here, we will handle it later
52
  truncation=True,
53
  )
54
 
@@ -56,7 +55,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
56
  labels = tokenizer(
57
  examples['target'],
58
  max_length=max_length, # Set to None for dynamic padding
59
- padding=False, # Disable padding here, we will handle it later
60
  truncation=True,
61
  text_target=examples['target'] # Use text_target for target text
62
  )
@@ -65,7 +64,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
65
  model_inputs["labels"] = labels["input_ids"]
66
  return model_inputs
67
 
68
- tokenized_datasets = dataset.map(tokenize_function)
69
  data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
70
 
71
  # Set training arguments
 
20
  error_rate = 1 - state.best_metric # Assuming best_metric is accuracy
21
  print(f"Current Error Rate: {error_rate:.4f}")
22
 
23
+ @spaces.GPU(duration=1800)
24
  def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch_size, lr, grad):
25
  try:
26
  login(api_key.strip())
 
35
 
36
  # Load the model and tokenizer
37
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name.strip(), num_labels=2)
38
+ model = get_peft_model(model, lora_config)
39
  tokenizer = AutoTokenizer.from_pretrained(model_name)
40
 
 
41
  max_length = 128
42
 
43
  # Tokenize the dataset
 
47
  model_inputs = tokenizer(
48
  examples['text'],
49
  max_length=max_length, # Set to None for dynamic padding
50
+ padding=True, # Disable padding here, we will handle it later
51
  truncation=True,
52
  )
53
 
 
55
  labels = tokenizer(
56
  examples['target'],
57
  max_length=max_length, # Set to None for dynamic padding
58
+ padding=True, # Disable padding here, we will handle it later
59
  truncation=True,
60
  text_target=examples['target'] # Use text_target for target text
61
  )
 
64
  model_inputs["labels"] = labels["input_ids"]
65
  return model_inputs
66
 
67
+ tokenized_datasets = dataset.map(tokenize_function, batched=True)
68
  data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
69
 
70
  # Set training arguments