Kevin Fink
commited on
Commit
·
69cfd5f
1
Parent(s):
93bdf45
dev
Browse files
app.py
CHANGED
@@ -111,7 +111,6 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
111 |
#max_length = 512
|
112 |
# Load the dataset
|
113 |
dataset = load_dataset(dataset_name.strip())
|
114 |
-
print(dataset.keys())
|
115 |
train_size = len(dataset['train'])
|
116 |
third_size = train_size // 3
|
117 |
max_length = model.get_input_embeddings().weight.shape[0]
|
@@ -139,7 +138,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
139 |
second_third = dataset['train'].select(range(third_size, third_size*2))
|
140 |
dataset['train'] = second_third
|
141 |
del dataset['test']
|
142 |
-
tokenized_sh_fq_dataset = tokenize_function
|
143 |
dataset['train'] = concatenate_datasets([saved_dataset['train'], tokenized_sh_fq_dataset['train']])
|
144 |
dataset.save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
|
145 |
return
|
|
|
111 |
#max_length = 512
|
112 |
# Load the dataset
|
113 |
dataset = load_dataset(dataset_name.strip())
|
|
|
114 |
train_size = len(dataset['train'])
|
115 |
third_size = train_size // 3
|
116 |
max_length = model.get_input_embeddings().weight.shape[0]
|
|
|
138 |
second_third = dataset['train'].select(range(third_size, third_size*2))
|
139 |
dataset['train'] = second_third
|
140 |
del dataset['test']
|
141 |
+
tokenized_sh_fq_dataset = dataset.map(tokenize_function, batched=True)
|
142 |
dataset['train'] = concatenate_datasets([saved_dataset['train'], tokenized_sh_fq_dataset['train']])
|
143 |
dataset.save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
|
144 |
return
|