Spaces:

shorecode
/

gradio-3

Sleeping

Kevin Fink commited on Dec 5, 2024

Commit

1c20c42

1 Parent(s): e504e90

init

Files changed (1) hide show

app.py CHANGED Viewed

@@ -38,9 +38,12 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
         #model = get_peft_model(model, lora_config)
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         # Tokenize the dataset
         def tokenize_function(examples):
-            max_length = 16
             # Assuming 'text' is the input and 'target' is the expected output
             model_inputs = tokenizer(
                 examples['text'],
@@ -62,7 +65,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
             model_inputs["labels"] = labels["input_ids"]
             return model_inputs
-        tokenized_datasets = dataset.map(tokenize_function, batched=True)
         data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
         # Set training arguments

         #model = get_peft_model(model, lora_config)
         tokenizer = AutoTokenizer.from_pretrained(model_name)
+        chunk_size = 1000
+        max_length = 128
         # Tokenize the dataset
         def tokenize_function(examples):
             # Assuming 'text' is the input and 'target' is the expected output
             model_inputs = tokenizer(
                 examples['text'],
             model_inputs["labels"] = labels["input_ids"]
             return model_inputs
+        tokenized_datasets = dataset.map(tokenize_function, batched=True, batch_size=16)
         data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
         # Set training arguments