Kevin Fink
commited on
Commit
·
e2f4c27
1
Parent(s):
f4325ab
init
Browse files
app.py
CHANGED
@@ -26,7 +26,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
|
|
26 |
model = get_peft_model(model, lora_config)
|
27 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
28 |
|
29 |
-
max_length =
|
30 |
try:
|
31 |
tokenized_train_dataset = load_from_disk(f'{hub_id.strip()}_train_dataset')
|
32 |
tokenized_test_dataset = load_from_disk(f'{hub_id.strip()}_test_dataset')
|
@@ -56,7 +56,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
|
|
56 |
model_inputs["labels"] = labels["input_ids"]
|
57 |
return model_inputs
|
58 |
|
59 |
-
tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
60 |
|
61 |
tokenized_datasets['train'].save_to_disk(f'{hub_id.strip()}_train_dataset')
|
62 |
tokenized_datasets['validation'].save_to_disk(f'{hub_id.strip()}_test_dataset')
|
|
|
26 |
model = get_peft_model(model, lora_config)
|
27 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
28 |
|
29 |
+
max_length = 128
|
30 |
try:
|
31 |
tokenized_train_dataset = load_from_disk(f'{hub_id.strip()}_train_dataset')
|
32 |
tokenized_test_dataset = load_from_disk(f'{hub_id.strip()}_test_dataset')
|
|
|
56 |
model_inputs["labels"] = labels["input_ids"]
|
57 |
return model_inputs
|
58 |
|
59 |
+
tokenized_datasets = dataset.map(tokenize_function, batched=True, batch_size=32)
|
60 |
|
61 |
tokenized_datasets['train'].save_to_disk(f'{hub_id.strip()}_train_dataset')
|
62 |
tokenized_datasets['validation'].save_to_disk(f'{hub_id.strip()}_test_dataset')
|