Kevin Fink
commited on
Commit
·
1744a34
1
Parent(s):
3b756d7
dev
Browse files
app.py
CHANGED
@@ -142,15 +142,14 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
142 |
except:
|
143 |
tokenizer = AutoTokenizer.from_pretrained('google/t5-efficient-tiny-nh8')
|
144 |
# Tokenize the dataset
|
145 |
-
|
146 |
-
dataset['train'] =
|
147 |
del dataset['test']
|
148 |
del dataset['validation']
|
149 |
-
|
150 |
-
tokenized_half = tokenize_function(first_half.to_dict())
|
151 |
|
152 |
-
|
153 |
-
|
154 |
return 'RUN AGAIN TO LOAD REST OF DATA'
|
155 |
|
156 |
# Fine-tune the model
|
|
|
142 |
except:
|
143 |
tokenizer = AutoTokenizer.from_pretrained('google/t5-efficient-tiny-nh8')
|
144 |
# Tokenize the dataset
|
145 |
+
first_third = dataset['train'].select(range(third_size))
|
146 |
+
dataset['train'] = first_third
|
147 |
del dataset['test']
|
148 |
del dataset['validation']
|
149 |
+
tokenized_first_third = dataset.map(tokenize_function, batched=True)
|
|
|
150 |
|
151 |
+
tokenized_first_third.save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
|
152 |
+
print('DONE')
|
153 |
return 'RUN AGAIN TO LOAD REST OF DATA'
|
154 |
|
155 |
# Fine-tune the model
|