Kevin Fink
commited on
Commit
·
d177146
1
Parent(s):
27c01f2
dev
Browse files
app.py
CHANGED
@@ -134,8 +134,6 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
134 |
|
135 |
elif os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
|
136 |
dataset = load_dataset(dataset_name.strip())
|
137 |
-
dataset['test'] = dataset['test'].select(range(800))
|
138 |
-
dataset['train'] = dataset['train'].select(range(8000))
|
139 |
del dataset['train']
|
140 |
del dataset['validation']
|
141 |
test_set = dataset.map(tokenize_function, batched=True, batch_size=5)
|
@@ -144,7 +142,6 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
144 |
|
145 |
elif os.access(f'/data/{hub_id.strip()}_validation_dataset', os.R_OK):
|
146 |
dataset = load_dataset(dataset_name.strip())
|
147 |
-
dataset['train'] = dataset['train'].select(range(8000))
|
148 |
train_size = len(dataset['train'])
|
149 |
third_size = train_size // 3
|
150 |
del dataset['test']
|
@@ -162,8 +159,6 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
162 |
|
163 |
if os.access(f'/data/{hub_id.strip()}_train_dataset', os.R_OK) and not os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
|
164 |
dataset = load_dataset(dataset_name.strip())
|
165 |
-
dataset['train'] = dataset['train'].select(range(8000))
|
166 |
-
dataset['validation'] = dataset['validation'].select(range(200))
|
167 |
train_size = len(dataset['train'])
|
168 |
third_size = train_size // 3
|
169 |
second_third = dataset['train'].select(range(third_size, third_size*2))
|
@@ -178,7 +173,6 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
178 |
except Exception as e:
|
179 |
print(f"An error occurred: {str(e)}, TB: {traceback.format_exc()}")
|
180 |
dataset = load_dataset(dataset_name.strip())
|
181 |
-
dataset['train'] = dataset['train'].select(range(8000))
|
182 |
train_size = len(dataset['train'])
|
183 |
third_size = train_size // 3
|
184 |
# Tokenize the dataset
|
|
|
134 |
|
135 |
elif os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
|
136 |
dataset = load_dataset(dataset_name.strip())
|
|
|
|
|
137 |
del dataset['train']
|
138 |
del dataset['validation']
|
139 |
test_set = dataset.map(tokenize_function, batched=True, batch_size=5)
|
|
|
142 |
|
143 |
elif os.access(f'/data/{hub_id.strip()}_validation_dataset', os.R_OK):
|
144 |
dataset = load_dataset(dataset_name.strip())
|
|
|
145 |
train_size = len(dataset['train'])
|
146 |
third_size = train_size // 3
|
147 |
del dataset['test']
|
|
|
159 |
|
160 |
if os.access(f'/data/{hub_id.strip()}_train_dataset', os.R_OK) and not os.access(f'/data/{hub_id.strip()}_train_dataset3', os.R_OK):
|
161 |
dataset = load_dataset(dataset_name.strip())
|
|
|
|
|
162 |
train_size = len(dataset['train'])
|
163 |
third_size = train_size // 3
|
164 |
second_third = dataset['train'].select(range(third_size, third_size*2))
|
|
|
173 |
except Exception as e:
|
174 |
print(f"An error occurred: {str(e)}, TB: {traceback.format_exc()}")
|
175 |
dataset = load_dataset(dataset_name.strip())
|
|
|
176 |
train_size = len(dataset['train'])
|
177 |
third_size = train_size // 3
|
178 |
# Tokenize the dataset
|