Kevin Fink
commited on
Commit
·
0a86c5b
1
Parent(s):
6397229
dev
Browse files
app.py
CHANGED
@@ -8,6 +8,8 @@ import os
|
|
8 |
from huggingface_hub import login
|
9 |
from peft import get_peft_model, LoraConfig
|
10 |
|
|
|
|
|
11 |
@spaces.GPU(duration=120)
|
12 |
def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch_size, lr, grad):
|
13 |
try:
|
@@ -28,8 +30,8 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
|
|
28 |
|
29 |
max_length = 128
|
30 |
try:
|
31 |
-
tokenized_train_dataset = load_from_disk(f'data/{hub_id.strip()}_train_dataset')
|
32 |
-
tokenized_test_dataset = load_from_disk(f'data/{hub_id.strip()}_test_dataset')
|
33 |
tokenized_datasets = concatenate_datasets([tokenized_train_dataset, tokenized_test_dataset])
|
34 |
except:
|
35 |
# Tokenize the dataset
|
@@ -58,8 +60,8 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
|
|
58 |
|
59 |
tokenized_datasets = dataset.map(tokenize_function, batched=True, batch_size=32)
|
60 |
|
61 |
-
tokenized_datasets['train'].save_to_disk(f'data/{hub_id.strip()}_train_dataset')
|
62 |
-
tokenized_datasets['test'].save_to_disk(f'data/{hub_id.strip()}_test_dataset')
|
63 |
|
64 |
|
65 |
# Set training arguments
|
|
|
8 |
from huggingface_hub import login
|
9 |
from peft import get_peft_model, LoraConfig
|
10 |
|
11 |
+
os.environ('HF_HOME', '/data/.huggingface')
|
12 |
+
|
13 |
@spaces.GPU(duration=120)
|
14 |
def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch_size, lr, grad):
|
15 |
try:
|
|
|
30 |
|
31 |
max_length = 128
|
32 |
try:
|
33 |
+
tokenized_train_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
|
34 |
+
tokenized_test_dataset = load_from_disk(f'/data/{hub_id.strip()}_test_dataset')
|
35 |
tokenized_datasets = concatenate_datasets([tokenized_train_dataset, tokenized_test_dataset])
|
36 |
except:
|
37 |
# Tokenize the dataset
|
|
|
60 |
|
61 |
tokenized_datasets = dataset.map(tokenize_function, batched=True, batch_size=32)
|
62 |
|
63 |
+
tokenized_datasets['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
|
64 |
+
tokenized_datasets['test'].save_to_disk(f'/data/{hub_id.strip()}_test_dataset')
|
65 |
|
66 |
|
67 |
# Set training arguments
|