Kevin Fink commited on
Commit
ba41d27
·
1 Parent(s): 9613a2c
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -131,8 +131,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
131
 
132
  elif os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
133
  dataset = load_dataset(dataset_name.strip())
134
- dataset['test'] = dataset['test'].select(range(1200))
135
- dataset['train'] = dataset['train'].select(range(12000))
136
  del dataset['train']
137
  del dataset['validation']
138
  test_set = dataset.map(tokenize_function, batched=True)
@@ -141,7 +141,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
141
 
142
  elif os.access(f'/data/{hub_id.strip()}_validation_dataset', os.R_OK):
143
  dataset = load_dataset(dataset_name.strip())
144
- dataset['train'] = dataset['train'].select(range(12000))
145
  train_size = len(dataset['train'])
146
  third_size = train_size // 3
147
  del dataset['test']
@@ -159,7 +159,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
159
 
160
  if os.access(f'/data/{hub_id.strip()}_train_dataset', os.R_OK) and not os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
161
  dataset = load_dataset(dataset_name.strip())
162
- dataset['train'] = dataset['train'].select(range(12000))
163
  dataset['validation'] = dataset['validation'].select(range(200))
164
  train_size = len(dataset['train'])
165
  third_size = train_size // 3
@@ -175,7 +175,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
175
  except Exception as e:
176
  print(f"An error occurred: {str(e)}, TB: {traceback.format_exc()}")
177
  dataset = load_dataset(dataset_name.strip())
178
- dataset['train'] = dataset['train'].select(range(12000))
179
  train_size = len(dataset['train'])
180
  third_size = train_size // 3
181
  # Tokenize the dataset
 
131
 
132
  elif os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
133
  dataset = load_dataset(dataset_name.strip())
134
+ dataset['test'] = dataset['test'].select(range(800))
135
+ dataset['train'] = dataset['train'].select(range(8000))
136
  del dataset['train']
137
  del dataset['validation']
138
  test_set = dataset.map(tokenize_function, batched=True)
 
141
 
142
  elif os.access(f'/data/{hub_id.strip()}_validation_dataset', os.R_OK):
143
  dataset = load_dataset(dataset_name.strip())
144
+ dataset['train'] = dataset['train'].select(range(8000))
145
  train_size = len(dataset['train'])
146
  third_size = train_size // 3
147
  del dataset['test']
 
159
 
160
  if os.access(f'/data/{hub_id.strip()}_train_dataset', os.R_OK) and not os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
161
  dataset = load_dataset(dataset_name.strip())
162
+ dataset['train'] = dataset['train'].select(range(8000))
163
  dataset['validation'] = dataset['validation'].select(range(200))
164
  train_size = len(dataset['train'])
165
  third_size = train_size // 3
 
175
  except Exception as e:
176
  print(f"An error occurred: {str(e)}, TB: {traceback.format_exc()}")
177
  dataset = load_dataset(dataset_name.strip())
178
+ dataset['train'] = dataset['train'].select(range(8000))
179
  train_size = len(dataset['train'])
180
  third_size = train_size // 3
181
  # Tokenize the dataset