Kevin Fink commited on
Commit
da5b30a
·
1 Parent(s): 30deac6
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -146,7 +146,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
146
  del dataset['train']
147
  del dataset['validation']
148
  test_set = tokenize_function(dataset['test'])
149
- test_set.save_to_disk(f'/data/{hub_id.strip()}_test_dataset')
 
150
  return 'TRAINING DONE'
151
 
152
  elif os.access(f'/data/{hub_id.strip()}_validation_dataset', os.R_OK):
@@ -161,7 +162,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
161
  saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset2')
162
  third_third = dataset['train'].select(range(third_size*2, train_size))
163
  dataset['train'] = third_third
164
- train_set_3 = tokenize_function(dataset['train'])
165
  dataset['train'] = concatenate_datasets([saved_dataset, train_set_3])
166
  dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset3')
167
  return 'THIRD THIRD LOADED'
@@ -180,9 +181,10 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
180
  del dataset['test']
181
  train_set_2 = tokenize_function(dataset['train'])
182
  validation_set = tokenize_function(dataset['validation'])
 
183
  dataset['train'] = concatenate_datasets([saved_dataset['train'], train_set_2])
184
  dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset2')
185
- validation_set.save_to_disk(f'/data/{hub_id.strip()}_validation_dataset')
186
 
187
  return 'SECOND THIRD LOADED'
188
 
@@ -199,7 +201,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
199
  del dataset['test']
200
  del dataset['validation']
201
  train_set = tokenize_function(dataset['train'])
202
- train_set.save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
 
203
  print('DONE')
204
  return 'RUN AGAIN TO LOAD REST OF DATA'
205
 
 
146
  del dataset['train']
147
  del dataset['validation']
148
  test_set = tokenize_function(dataset['test'])
149
+ dataset['test'] =test_set
150
+ dataset['test'].save_to_disk(f'/data/{hub_id.strip()}_test_dataset')
151
  return 'TRAINING DONE'
152
 
153
  elif os.access(f'/data/{hub_id.strip()}_validation_dataset', os.R_OK):
 
162
  saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset2')
163
  third_third = dataset['train'].select(range(third_size*2, train_size))
164
  dataset['train'] = third_third
165
+ train_set_3 = tokenize_function(dataset['train'])
166
  dataset['train'] = concatenate_datasets([saved_dataset, train_set_3])
167
  dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset3')
168
  return 'THIRD THIRD LOADED'
 
181
  del dataset['test']
182
  train_set_2 = tokenize_function(dataset['train'])
183
  validation_set = tokenize_function(dataset['validation'])
184
+ dataset['validation'] = validation_set
185
  dataset['train'] = concatenate_datasets([saved_dataset['train'], train_set_2])
186
  dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset2')
187
+ dataset['validation'].save_to_disk(f'/data/{hub_id.strip()}_validation_dataset')
188
 
189
  return 'SECOND THIRD LOADED'
190
 
 
201
  del dataset['test']
202
  del dataset['validation']
203
  train_set = tokenize_function(dataset['train'])
204
+ dataset['train'] = train_set
205
+ dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
206
  print('DONE')
207
  return 'RUN AGAIN TO LOAD REST OF DATA'
208