Kevin Fink
commited on
Commit
·
ba41d27
1
Parent(s):
9613a2c
dev
Browse files
app.py
CHANGED
@@ -131,8 +131,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
131 |
|
132 |
elif os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
|
133 |
dataset = load_dataset(dataset_name.strip())
|
134 |
-
dataset['test'] = dataset['test'].select(range(
|
135 |
-
dataset['train'] = dataset['train'].select(range(
|
136 |
del dataset['train']
|
137 |
del dataset['validation']
|
138 |
test_set = dataset.map(tokenize_function, batched=True)
|
@@ -141,7 +141,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
141 |
|
142 |
elif os.access(f'/data/{hub_id.strip()}_validation_dataset', os.R_OK):
|
143 |
dataset = load_dataset(dataset_name.strip())
|
144 |
-
dataset['train'] = dataset['train'].select(range(
|
145 |
train_size = len(dataset['train'])
|
146 |
third_size = train_size // 3
|
147 |
del dataset['test']
|
@@ -159,7 +159,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
159 |
|
160 |
if os.access(f'/data/{hub_id.strip()}_train_dataset', os.R_OK) and not os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
|
161 |
dataset = load_dataset(dataset_name.strip())
|
162 |
-
dataset['train'] = dataset['train'].select(range(
|
163 |
dataset['validation'] = dataset['validation'].select(range(200))
|
164 |
train_size = len(dataset['train'])
|
165 |
third_size = train_size // 3
|
@@ -175,7 +175,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
175 |
except Exception as e:
|
176 |
print(f"An error occurred: {str(e)}, TB: {traceback.format_exc()}")
|
177 |
dataset = load_dataset(dataset_name.strip())
|
178 |
-
dataset['train'] = dataset['train'].select(range(
|
179 |
train_size = len(dataset['train'])
|
180 |
third_size = train_size // 3
|
181 |
# Tokenize the dataset
|
|
|
131 |
|
132 |
elif os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
|
133 |
dataset = load_dataset(dataset_name.strip())
|
134 |
+
dataset['test'] = dataset['test'].select(range(800))
|
135 |
+
dataset['train'] = dataset['train'].select(range(8000))
|
136 |
del dataset['train']
|
137 |
del dataset['validation']
|
138 |
test_set = dataset.map(tokenize_function, batched=True)
|
|
|
141 |
|
142 |
elif os.access(f'/data/{hub_id.strip()}_validation_dataset', os.R_OK):
|
143 |
dataset = load_dataset(dataset_name.strip())
|
144 |
+
dataset['train'] = dataset['train'].select(range(8000))
|
145 |
train_size = len(dataset['train'])
|
146 |
third_size = train_size // 3
|
147 |
del dataset['test']
|
|
|
159 |
|
160 |
if os.access(f'/data/{hub_id.strip()}_train_dataset', os.R_OK) and not os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
|
161 |
dataset = load_dataset(dataset_name.strip())
|
162 |
+
dataset['train'] = dataset['train'].select(range(8000))
|
163 |
dataset['validation'] = dataset['validation'].select(range(200))
|
164 |
train_size = len(dataset['train'])
|
165 |
third_size = train_size // 3
|
|
|
175 |
except Exception as e:
|
176 |
print(f"An error occurred: {str(e)}, TB: {traceback.format_exc()}")
|
177 |
dataset = load_dataset(dataset_name.strip())
|
178 |
+
dataset['train'] = dataset['train'].select(range(8000))
|
179 |
train_size = len(dataset['train'])
|
180 |
third_size = train_size // 3
|
181 |
# Tokenize the dataset
|