Kevin Fink commited on
Commit
d177146
·
1 Parent(s): 27c01f2
Files changed (1) hide show
  1. app.py +0 -6
app.py CHANGED
@@ -134,8 +134,6 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
134
 
135
  elif os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
136
  dataset = load_dataset(dataset_name.strip())
137
- dataset['test'] = dataset['test'].select(range(800))
138
- dataset['train'] = dataset['train'].select(range(8000))
139
  del dataset['train']
140
  del dataset['validation']
141
  test_set = dataset.map(tokenize_function, batched=True, batch_size=5)
@@ -144,7 +142,6 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
144
 
145
  elif os.access(f'/data/{hub_id.strip()}_validation_dataset', os.R_OK):
146
  dataset = load_dataset(dataset_name.strip())
147
- dataset['train'] = dataset['train'].select(range(8000))
148
  train_size = len(dataset['train'])
149
  third_size = train_size // 3
150
  del dataset['test']
@@ -162,8 +159,6 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
162
 
163
  if os.access(f'/data/{hub_id.strip()}_train_dataset', os.R_OK) and not os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
164
  dataset = load_dataset(dataset_name.strip())
165
- dataset['train'] = dataset['train'].select(range(8000))
166
- dataset['validation'] = dataset['validation'].select(range(200))
167
  train_size = len(dataset['train'])
168
  third_size = train_size // 3
169
  second_third = dataset['train'].select(range(third_size, third_size*2))
@@ -178,7 +173,6 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
178
  except Exception as e:
179
  print(f"An error occurred: {str(e)}, TB: {traceback.format_exc()}")
180
  dataset = load_dataset(dataset_name.strip())
181
- dataset['train'] = dataset['train'].select(range(8000))
182
  train_size = len(dataset['train'])
183
  third_size = train_size // 3
184
  # Tokenize the dataset
 
134
 
135
  elif os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
136
  dataset = load_dataset(dataset_name.strip())
 
 
137
  del dataset['train']
138
  del dataset['validation']
139
  test_set = dataset.map(tokenize_function, batched=True, batch_size=5)
 
142
 
143
  elif os.access(f'/data/{hub_id.strip()}_validation_dataset', os.R_OK):
144
  dataset = load_dataset(dataset_name.strip())
 
145
  train_size = len(dataset['train'])
146
  third_size = train_size // 3
147
  del dataset['test']
 
159
 
160
  if os.access(f'/data/{hub_id.strip()}_train_dataset', os.R_OK) and not os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
161
  dataset = load_dataset(dataset_name.strip())
 
 
162
  train_size = len(dataset['train'])
163
  third_size = train_size // 3
164
  second_third = dataset['train'].select(range(third_size, third_size*2))
 
173
  except Exception as e:
174
  print(f"An error occurred: {str(e)}, TB: {traceback.format_exc()}")
175
  dataset = load_dataset(dataset_name.strip())
 
176
  train_size = len(dataset['train'])
177
  third_size = train_size // 3
178
  # Tokenize the dataset