Spaces:

shorecode
/

gradio-3

Sleeping

App Files Files Community

Kevin Fink commited on Dec 5, 2024

Commit

ff67bb4

1 Parent(s): 33de791

init

Browse files

Files changed (1) hide show

app.py +5 -6

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ class LoggingCallback(TrainerCallback):
             error_rate = 1 - state.best_metric  # Assuming best_metric is accuracy
             print(f"Current Error Rate: {error_rate:.4f}")
-@spaces.GPU
 def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch_size, lr, grad):
     try:
         login(api_key.strip())
@@ -35,10 +35,9 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
         # Load the model and tokenizer
         model = AutoModelForSeq2SeqLM.from_pretrained(model_name.strip(), num_labels=2)
-        #model = get_peft_model(model, lora_config)
         tokenizer = AutoTokenizer.from_pretrained(model_name)
-        chunk_size = 1000
         max_length = 128
         # Tokenize the dataset
@@ -48,7 +47,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
             model_inputs = tokenizer(
                 examples['text'],
                 max_length=max_length,  # Set to None for dynamic padding
-                padding=False,     # Disable padding here, we will handle it later
                 truncation=True,
             )
@@ -56,7 +55,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
             labels = tokenizer(
                 examples['target'],
                 max_length=max_length,  # Set to None for dynamic padding
-                padding=False,     # Disable padding here, we will handle it later
                 truncation=True,
                 text_target=examples['target']  # Use text_target for target text
             )
@@ -65,7 +64,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
             model_inputs["labels"] = labels["input_ids"]
             return model_inputs
-        tokenized_datasets = dataset.map(tokenize_function)
         data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
         # Set training arguments

             error_rate = 1 - state.best_metric  # Assuming best_metric is accuracy
             print(f"Current Error Rate: {error_rate:.4f}")
+@spaces.GPU(duration=1800)
 def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch_size, lr, grad):
     try:
         login(api_key.strip())
         # Load the model and tokenizer
         model = AutoModelForSeq2SeqLM.from_pretrained(model_name.strip(), num_labels=2)
+        model = get_peft_model(model, lora_config)
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         max_length = 128
         # Tokenize the dataset
             model_inputs = tokenizer(
                 examples['text'],
                 max_length=max_length,  # Set to None for dynamic padding
+                padding=True,     # Disable padding here, we will handle it later
                 truncation=True,
             )
             labels = tokenizer(
                 examples['target'],
                 max_length=max_length,  # Set to None for dynamic padding
+                padding=True,     # Disable padding here, we will handle it later
                 truncation=True,
                 text_target=examples['target']  # Use text_target for target text
             )
             model_inputs["labels"] = labels["input_ids"]
             return model_inputs
+        tokenized_datasets = dataset.map(tokenize_function, batched=True)
         data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
         # Set training arguments