Kevin Fink
commited on
Commit
·
4dafb88
1
Parent(s):
ab2f056
dev
Browse files
app.py
CHANGED
@@ -117,7 +117,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
117 |
try:
|
118 |
tokenized_first_half = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
|
119 |
second_half = dataset['train'].select(range(half_size, train_size))
|
120 |
-
|
|
|
121 |
tokenized_train_dataset = concatenate_datasets([tokenized_first_half, tokenized_second_half])
|
122 |
tokenized_test_dataset = tokenize_function(dataset['test'])
|
123 |
|
@@ -133,6 +134,10 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
133 |
tokenizer = AutoTokenizer.from_pretrained('google/t5-efficient-tiny-nh8')
|
134 |
# Tokenize the dataset
|
135 |
first_half = dataset['train'].select(range(half_size))
|
|
|
|
|
|
|
|
|
136 |
tokenized_half = tokenize_function(first_half.to_dict())
|
137 |
|
138 |
tokenized_half.save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
|
@@ -211,5 +216,4 @@ try:
|
|
211 |
# Launch the interface
|
212 |
iface.launch()
|
213 |
except Exception as e:
|
214 |
-
print(f"An error occurred: {str(e)}, TB: {traceback.format_exc()}")
|
215 |
-
|
|
|
117 |
try:
|
118 |
tokenized_first_half = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
|
119 |
second_half = dataset['train'].select(range(half_size, train_size))
|
120 |
+
dataset['train'] = second_half
|
121 |
+
tokenized_second_half = dataset.map(tokenize_function, batched=True)
|
122 |
tokenized_train_dataset = concatenate_datasets([tokenized_first_half, tokenized_second_half])
|
123 |
tokenized_test_dataset = tokenize_function(dataset['test'])
|
124 |
|
|
|
134 |
tokenizer = AutoTokenizer.from_pretrained('google/t5-efficient-tiny-nh8')
|
135 |
# Tokenize the dataset
|
136 |
first_half = dataset['train'].select(range(half_size))
|
137 |
+
dataset['train'] = first_half
|
138 |
+
del dataset['test']
|
139 |
+
del dataset['validation']
|
140 |
+
tokenized_second_half = dataset.map(tokenize_function, batched=True)
|
141 |
tokenized_half = tokenize_function(first_half.to_dict())
|
142 |
|
143 |
tokenized_half.save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
|
|
|
216 |
# Launch the interface
|
217 |
iface.launch()
|
218 |
except Exception as e:
|
219 |
+
print(f"An error occurred: {str(e)}, TB: {traceback.format_exc()}")
|
|