Kevin Fink commited on
Commit
5a4a1bb
·
1 Parent(s): 10e867c
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -124,10 +124,11 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
124
  print("FOUND VALIDATION")
125
  saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset2')
126
  third_third = dataset['train'].select(range(third_size*2, train_size))
127
- dataset['train'] = third_third
128
  print(dataset)
129
  print(dataset.keys())
130
- tokenized_second_half = dataset.map(tokenize_function, batched=True)
 
131
  dataset['train'] = concatenate_datasets([saved_dataset['train'], tokenized_second_half['train']])
132
  dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset3')
133
  return 'THIRD THIRD LOADED'
 
124
  print("FOUND VALIDATION")
125
  saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset2')
126
  third_third = dataset['train'].select(range(third_size*2, train_size))
127
+ #dataset['train'] = third_third
128
  print(dataset)
129
  print(dataset.keys())
130
+ tokenized_second_half = tokenize_function(third_third)
131
+ #tokenized_second_half = dataset.map(tokenize_function)
132
  dataset['train'] = concatenate_datasets([saved_dataset['train'], tokenized_second_half['train']])
133
  dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset3')
134
  return 'THIRD THIRD LOADED'