Kevin Fink
commited on
Commit
·
81f28e8
1
Parent(s):
0aa217c
dev
Browse files
app.py
CHANGED
@@ -140,6 +140,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
140 |
tokenized_train_dataset = dataset['train']
|
141 |
tokenized_test_dataset = dataset['test']
|
142 |
dataset.save_to_disk(f'/data/{hub_id.strip()}_train_dataset3')
|
|
|
143 |
else:
|
144 |
second_third = dataset['train'].select(range(third_size, third_size*2))
|
145 |
dataset['train'] = second_third
|
@@ -147,7 +148,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
147 |
tokenized_sh_fq_dataset = dataset.map(tokenize_function, batched=True)
|
148 |
dataset['train'] = concatenate_datasets([saved_dataset['train'], tokenized_sh_fq_dataset['train']])
|
149 |
dataset.save_to_disk(f'/data/{hub_id.strip()}_train_dataset2')
|
150 |
-
return
|
151 |
|
152 |
except:
|
153 |
# Tokenize the dataset
|
|
|
140 |
tokenized_train_dataset = dataset['train']
|
141 |
tokenized_test_dataset = dataset['test']
|
142 |
dataset.save_to_disk(f'/data/{hub_id.strip()}_train_dataset3')
|
143 |
+
return 'THIRD THIRD LOADED'
|
144 |
else:
|
145 |
second_third = dataset['train'].select(range(third_size, third_size*2))
|
146 |
dataset['train'] = second_third
|
|
|
148 |
tokenized_sh_fq_dataset = dataset.map(tokenize_function, batched=True)
|
149 |
dataset['train'] = concatenate_datasets([saved_dataset['train'], tokenized_sh_fq_dataset['train']])
|
150 |
dataset.save_to_disk(f'/data/{hub_id.strip()}_train_dataset2')
|
151 |
+
return 'SECOND THIRD LOADED'
|
152 |
|
153 |
except:
|
154 |
# Tokenize the dataset
|