Kevin Fink commited on
Commit
1744a34
·
1 Parent(s): 3b756d7
Files changed (1) hide show
  1. app.py +5 -6
app.py CHANGED
@@ -142,15 +142,14 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
142
  except:
143
  tokenizer = AutoTokenizer.from_pretrained('google/t5-efficient-tiny-nh8')
144
  # Tokenize the dataset
145
- first_half = dataset['train'].select(range(third_size))
146
- dataset['train'] = first_half
147
  del dataset['test']
148
  del dataset['validation']
149
- tokenized_second_half = dataset.map(tokenize_function, batched=True)
150
- tokenized_half = tokenize_function(first_half.to_dict())
151
 
152
- tokenized_half.save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
153
-
154
  return 'RUN AGAIN TO LOAD REST OF DATA'
155
 
156
  # Fine-tune the model
 
142
  except:
143
  tokenizer = AutoTokenizer.from_pretrained('google/t5-efficient-tiny-nh8')
144
  # Tokenize the dataset
145
+ first_third = dataset['train'].select(range(third_size))
146
+ dataset['train'] = first_third
147
  del dataset['test']
148
  del dataset['validation']
149
+ tokenized_first_third = dataset.map(tokenize_function, batched=True)
 
150
 
151
+ tokenized_first_third.save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
152
+ print('DONE')
153
  return 'RUN AGAIN TO LOAD REST OF DATA'
154
 
155
  # Fine-tune the model