Kevin Fink commited on
Commit
0a86c5b
·
1 Parent(s): 6397229
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -8,6 +8,8 @@ import os
8
  from huggingface_hub import login
9
  from peft import get_peft_model, LoraConfig
10
 
 
 
11
  @spaces.GPU(duration=120)
12
  def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch_size, lr, grad):
13
  try:
@@ -28,8 +30,8 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
28
 
29
  max_length = 128
30
  try:
31
- tokenized_train_dataset = load_from_disk(f'data/{hub_id.strip()}_train_dataset')
32
- tokenized_test_dataset = load_from_disk(f'data/{hub_id.strip()}_test_dataset')
33
  tokenized_datasets = concatenate_datasets([tokenized_train_dataset, tokenized_test_dataset])
34
  except:
35
  # Tokenize the dataset
@@ -58,8 +60,8 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
58
 
59
  tokenized_datasets = dataset.map(tokenize_function, batched=True, batch_size=32)
60
 
61
- tokenized_datasets['train'].save_to_disk(f'data/{hub_id.strip()}_train_dataset')
62
- tokenized_datasets['test'].save_to_disk(f'data/{hub_id.strip()}_test_dataset')
63
 
64
 
65
  # Set training arguments
 
8
  from huggingface_hub import login
9
  from peft import get_peft_model, LoraConfig
10
 
11
+ os.environ('HF_HOME', '/data/.huggingface')
12
+
13
  @spaces.GPU(duration=120)
14
  def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch_size, lr, grad):
15
  try:
 
30
 
31
  max_length = 128
32
  try:
33
+ tokenized_train_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
34
+ tokenized_test_dataset = load_from_disk(f'/data/{hub_id.strip()}_test_dataset')
35
  tokenized_datasets = concatenate_datasets([tokenized_train_dataset, tokenized_test_dataset])
36
  except:
37
  # Tokenize the dataset
 
60
 
61
  tokenized_datasets = dataset.map(tokenize_function, batched=True, batch_size=32)
62
 
63
+ tokenized_datasets['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
64
+ tokenized_datasets['test'].save_to_disk(f'/data/{hub_id.strip()}_test_dataset')
65
 
66
 
67
  # Set training arguments