Kevin Fink commited on
Commit
8849792
·
1 Parent(s): ae2e833
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -142,13 +142,15 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
142
  except:
143
  dataset = load_dataset(dataset_name.strip())
144
  train_size = len(dataset['train'])
145
- third_size = train_size // 3
 
 
146
  print("FOUND VALIDATION")
147
  saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset2')
148
  third_third = dataset['train'].select(range(third_size*2, train_size))
149
  dataset['train'] = third_third
150
- del dataset['test']
151
- del dataset['validation']
152
  tokenized_second_half = dataset.map(tokenize_function, batched=True)
153
  dataset['train'] = concatenate_datasets([saved_dataset['train'], tokenized_second_half['train']])
154
  dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset3')
 
142
  except:
143
  dataset = load_dataset(dataset_name.strip())
144
  train_size = len(dataset['train'])
145
+ third_size = train_size // 3
146
+ del dataset['test']
147
+ del dataset['validation']
148
  print("FOUND VALIDATION")
149
  saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset2')
150
  third_third = dataset['train'].select(range(third_size*2, train_size))
151
  dataset['train'] = third_third
152
+ print(dataset)
153
+ print(dataset.keys())
154
  tokenized_second_half = dataset.map(tokenize_function, batched=True)
155
  dataset['train'] = concatenate_datasets([saved_dataset['train'], tokenized_second_half['train']])
156
  dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset3')