Kevin Fink
commited on
Commit
·
1c20c42
1
Parent(s):
e504e90
init
Browse files
app.py
CHANGED
@@ -38,9 +38,12 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
|
|
38 |
#model = get_peft_model(model, lora_config)
|
39 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
40 |
|
|
|
|
|
|
|
41 |
# Tokenize the dataset
|
42 |
def tokenize_function(examples):
|
43 |
-
|
44 |
# Assuming 'text' is the input and 'target' is the expected output
|
45 |
model_inputs = tokenizer(
|
46 |
examples['text'],
|
@@ -62,7 +65,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
|
|
62 |
model_inputs["labels"] = labels["input_ids"]
|
63 |
return model_inputs
|
64 |
|
65 |
-
tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
66 |
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
|
67 |
|
68 |
# Set training arguments
|
|
|
38 |
#model = get_peft_model(model, lora_config)
|
39 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
40 |
|
41 |
+
chunk_size = 1000
|
42 |
+
max_length = 128
|
43 |
+
|
44 |
# Tokenize the dataset
|
45 |
def tokenize_function(examples):
|
46 |
+
|
47 |
# Assuming 'text' is the input and 'target' is the expected output
|
48 |
model_inputs = tokenizer(
|
49 |
examples['text'],
|
|
|
65 |
model_inputs["labels"] = labels["input_ids"]
|
66 |
return model_inputs
|
67 |
|
68 |
+
tokenized_datasets = dataset.map(tokenize_function, batched=True, batch_size=16)
|
69 |
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
|
70 |
|
71 |
# Set training arguments
|