Spaces:
Runtime error
Runtime error
Add model saving
Browse files- pipelines/train.py +36 -13
pipelines/train.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import sys
|
2 |
sys.path.append('..')
|
|
|
3 |
|
4 |
# torch
|
5 |
import torch
|
@@ -32,20 +33,20 @@ stub = Stub(
|
|
32 |
)
|
33 |
|
34 |
@stub.function(
|
35 |
-
gpu=
|
36 |
mounts=[
|
37 |
Mount.from_local_file(local_path='dataset.py'),
|
38 |
Mount.from_local_file(local_path='cnn.py'),
|
39 |
],
|
40 |
-
timeout=EPOCHS *
|
41 |
-
secret=Secret.from_name("wandb")
|
42 |
)
|
43 |
def train(
|
44 |
model,
|
45 |
train_dataloader,
|
46 |
loss_fn,
|
47 |
optimizer,
|
48 |
-
|
49 |
epochs=10,
|
50 |
):
|
51 |
import os
|
@@ -57,8 +58,12 @@ def train(
|
|
57 |
print("Begin model training...")
|
58 |
begin = time.time()
|
59 |
|
|
|
|
|
60 |
# set model to cuda
|
61 |
-
|
|
|
|
|
62 |
|
63 |
# metrics
|
64 |
training_acc = []
|
@@ -71,7 +76,7 @@ def train(
|
|
71 |
then = time.time()
|
72 |
|
73 |
# train model
|
74 |
-
train_epoch_loss, train_epoch_acc = train_epoch.call(model, train_dataloader, loss_fn, optimizer,
|
75 |
|
76 |
# training metrics
|
77 |
training_loss.append(train_epoch_loss/len(train_dataloader))
|
@@ -79,18 +84,19 @@ def train(
|
|
79 |
wandb.log({'training_loss': training_loss[i], 'training_acc': training_acc[i]})
|
80 |
|
81 |
now = time.time()
|
82 |
-
print("Training Loss: {:.2f}, Training Accuracy: {:.
|
83 |
|
84 |
-
print ("
|
85 |
|
86 |
end = time.time()
|
87 |
wandb.finish()
|
88 |
-
|
89 |
print("-------- Finished Training --------")
|
90 |
print("-------- Total Time -- {:.2f}s --------".format(end - begin))
|
91 |
|
|
|
|
|
92 |
@stub.function(
|
93 |
-
gpu=
|
94 |
mounts=[
|
95 |
Mount.from_local_file(local_path='dataset.py'),
|
96 |
Mount.from_local_file(local_path='cnn.py'),
|
@@ -124,12 +130,26 @@ def train_epoch(model, train_dataloader, loss_fn, optimizer, device):
|
|
124 |
train_acc += (prediction == target).sum().item()/len(prediction)
|
125 |
total += 1
|
126 |
|
127 |
-
return train_loss, train_acc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
@stub.local_entrypoint()
|
130 |
def main():
|
131 |
print("Initiating model training...")
|
132 |
-
device =
|
133 |
|
134 |
# instantiating our dataset object and create data loader
|
135 |
mel_spectrogram = torchaudio.transforms.MelSpectrogram(
|
@@ -151,5 +171,8 @@ def main():
|
|
151 |
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
|
152 |
|
153 |
# train model
|
154 |
-
train.call(model, train_dataloader, loss_fn, optimizer,
|
|
|
|
|
|
|
155 |
|
|
|
1 |
import sys
|
2 |
sys.path.append('..')
|
3 |
+
import time
|
4 |
|
5 |
# torch
|
6 |
import torch
|
|
|
33 |
)
|
34 |
|
35 |
@stub.function(
|
36 |
+
gpu="any",
|
37 |
mounts=[
|
38 |
Mount.from_local_file(local_path='dataset.py'),
|
39 |
Mount.from_local_file(local_path='cnn.py'),
|
40 |
],
|
41 |
+
timeout=EPOCHS * 200,
|
42 |
+
secret=Secret.from_name("wandb"),
|
43 |
)
|
44 |
def train(
|
45 |
model,
|
46 |
train_dataloader,
|
47 |
loss_fn,
|
48 |
optimizer,
|
49 |
+
origin_device="cuda",
|
50 |
epochs=10,
|
51 |
):
|
52 |
import os
|
|
|
58 |
print("Begin model training...")
|
59 |
begin = time.time()
|
60 |
|
61 |
+
modal_device = origin_device
|
62 |
+
|
63 |
# set model to cuda
|
64 |
+
if torch.cuda.is_available() and modal_device != "cuda":
|
65 |
+
modal_device = "cuda"
|
66 |
+
model = model.to(modal_device)
|
67 |
|
68 |
# metrics
|
69 |
training_acc = []
|
|
|
76 |
then = time.time()
|
77 |
|
78 |
# train model
|
79 |
+
model, train_epoch_loss, train_epoch_acc = train_epoch.call(model, train_dataloader, loss_fn, optimizer, modal_device)
|
80 |
|
81 |
# training metrics
|
82 |
training_loss.append(train_epoch_loss/len(train_dataloader))
|
|
|
84 |
wandb.log({'training_loss': training_loss[i], 'training_acc': training_acc[i]})
|
85 |
|
86 |
now = time.time()
|
87 |
+
print("Training Loss: {:.2f}, Training Accuracy: {:.4f}, Time: {:.2f}s".format(training_loss[i], training_acc[i], now - then))
|
88 |
|
89 |
+
print ("-------------------------------------------------------- \n")
|
90 |
|
91 |
end = time.time()
|
92 |
wandb.finish()
|
|
|
93 |
print("-------- Finished Training --------")
|
94 |
print("-------- Total Time -- {:.2f}s --------".format(end - begin))
|
95 |
|
96 |
+
return model.to(origin_device)
|
97 |
+
|
98 |
@stub.function(
|
99 |
+
gpu="any",
|
100 |
mounts=[
|
101 |
Mount.from_local_file(local_path='dataset.py'),
|
102 |
Mount.from_local_file(local_path='cnn.py'),
|
|
|
130 |
train_acc += (prediction == target).sum().item()/len(prediction)
|
131 |
total += 1
|
132 |
|
133 |
+
return model, train_loss, train_acc
|
134 |
+
|
135 |
+
def save_model(model):
|
136 |
+
now = time.strftime("%Y%m%d_%H%M%S")
|
137 |
+
model_filename = f"models/void_{now}.pth"
|
138 |
+
torch.save(model.state_dict(), model_filename)
|
139 |
+
print(f"Trained void model saved at {model_filename}")
|
140 |
+
|
141 |
+
def get_device():
|
142 |
+
if torch.cuda.is_available():
|
143 |
+
device = "cuda"
|
144 |
+
else:
|
145 |
+
device = "cpu"
|
146 |
+
|
147 |
+
return device
|
148 |
|
149 |
@stub.local_entrypoint()
|
150 |
def main():
|
151 |
print("Initiating model training...")
|
152 |
+
device = get_device()
|
153 |
|
154 |
# instantiating our dataset object and create data loader
|
155 |
mel_spectrogram = torchaudio.transforms.MelSpectrogram(
|
|
|
171 |
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
|
172 |
|
173 |
# train model
|
174 |
+
model = train.call(model, train_dataloader, loss_fn, optimizer, device, 3)
|
175 |
+
|
176 |
+
# save model
|
177 |
+
save_model(model)
|
178 |
|