Spaces:
Runtime error
Runtime error
Add wanb
Browse files- pipelines/images.py +1 -0
- pipelines/train.py +12 -4
- train.py +1 -1
pipelines/images.py
CHANGED
@@ -17,5 +17,6 @@ training_image_pip = (
|
|
17 |
"torchaudio==2.0.0",
|
18 |
"pandas",
|
19 |
"tqdm",
|
|
|
20 |
)
|
21 |
)
|
|
|
17 |
"torchaudio==2.0.0",
|
18 |
"pandas",
|
19 |
"tqdm",
|
20 |
+
"wandb",
|
21 |
)
|
22 |
)
|
pipelines/train.py
CHANGED
@@ -8,7 +8,7 @@ from torch import nn
|
|
8 |
from torch.utils.data import DataLoader
|
9 |
|
10 |
# modal
|
11 |
-
from modal import Mount, Stub, gpu, create_package_mounts
|
12 |
|
13 |
# internal
|
14 |
from pipelines.images import training_image_pip
|
@@ -38,28 +38,34 @@ stub = Stub(
|
|
38 |
Mount.from_local_file(local_path='cnn.py'),
|
39 |
],
|
40 |
timeout=EPOCHS * 60,
|
|
|
41 |
)
|
42 |
def train(
|
43 |
model,
|
44 |
train_dataloader,
|
45 |
loss_fn,
|
46 |
optimizer,
|
47 |
-
device,
|
48 |
-
epochs,
|
49 |
):
|
|
|
|
|
50 |
import time
|
51 |
import torch
|
52 |
-
|
53 |
|
54 |
print("Begin model training...")
|
55 |
begin = time.time()
|
56 |
|
|
|
57 |
model = model.to(device)
|
58 |
|
59 |
# metrics
|
60 |
training_acc = []
|
61 |
training_loss = []
|
62 |
|
|
|
|
|
63 |
for i in range(epochs):
|
64 |
print(f"Epoch {i + 1}/{epochs}")
|
65 |
then = time.time()
|
@@ -70,6 +76,7 @@ def train(
|
|
70 |
# training metrics
|
71 |
training_loss.append(train_epoch_loss/len(train_dataloader))
|
72 |
training_acc.append(train_epoch_acc/len(train_dataloader))
|
|
|
73 |
|
74 |
now = time.time()
|
75 |
print("Training Loss: {:.2f}, Training Accuracy: {:.2f}, Time: {:.2f}s".format(training_loss[i], training_acc[i], now - then))
|
@@ -77,6 +84,7 @@ def train(
|
|
77 |
print ("-------------------------------------------- \n")
|
78 |
|
79 |
end = time.time()
|
|
|
80 |
|
81 |
print("-------- Finished Training --------")
|
82 |
print("-------- Total Time -- {:.2f}s --------".format(end - begin))
|
|
|
8 |
from torch.utils.data import DataLoader
|
9 |
|
10 |
# modal
|
11 |
+
from modal import Mount, Secret, Stub, gpu, create_package_mounts
|
12 |
|
13 |
# internal
|
14 |
from pipelines.images import training_image_pip
|
|
|
38 |
Mount.from_local_file(local_path='cnn.py'),
|
39 |
],
|
40 |
timeout=EPOCHS * 60,
|
41 |
+
secret=Secret.from_name("wandb")
|
42 |
)
|
43 |
def train(
|
44 |
model,
|
45 |
train_dataloader,
|
46 |
loss_fn,
|
47 |
optimizer,
|
48 |
+
device="cuda",
|
49 |
+
epochs=10,
|
50 |
):
|
51 |
+
import os
|
52 |
+
|
53 |
import time
|
54 |
import torch
|
55 |
+
import wandb
|
56 |
|
57 |
print("Begin model training...")
|
58 |
begin = time.time()
|
59 |
|
60 |
+
# set model to cuda
|
61 |
model = model.to(device)
|
62 |
|
63 |
# metrics
|
64 |
training_acc = []
|
65 |
training_loss = []
|
66 |
|
67 |
+
wandb.init(project="void-training")
|
68 |
+
|
69 |
for i in range(epochs):
|
70 |
print(f"Epoch {i + 1}/{epochs}")
|
71 |
then = time.time()
|
|
|
76 |
# training metrics
|
77 |
training_loss.append(train_epoch_loss/len(train_dataloader))
|
78 |
training_acc.append(train_epoch_acc/len(train_dataloader))
|
79 |
+
wandb.log({'training_loss': training_loss[i], 'training_acc': training_acc[i]})
|
80 |
|
81 |
now = time.time()
|
82 |
print("Training Loss: {:.2f}, Training Accuracy: {:.2f}, Time: {:.2f}s".format(training_loss[i], training_acc[i], now - then))
|
|
|
84 |
print ("-------------------------------------------- \n")
|
85 |
|
86 |
end = time.time()
|
87 |
+
wandb.finish()
|
88 |
|
89 |
print("-------- Finished Training --------")
|
90 |
print("-------- Total Time -- {:.2f}s --------".format(end - begin))
|
train.py
CHANGED
@@ -49,7 +49,7 @@ def train(model, train_dataloader, loss_fn, optimizer, device, epochs, test_data
|
|
49 |
testing_acc.append(test_epoch_acc/len(test_dataloader))
|
50 |
|
51 |
print("Testing Loss: {:.2f}, Testing Accuracy {}".format(testing_loss[i], testing_acc[i]))
|
52 |
-
wandb.log({'testing_loss': testing_loss[i], '
|
53 |
|
54 |
print ("-------------------------------------------- \n")
|
55 |
|
|
|
49 |
testing_acc.append(test_epoch_acc/len(test_dataloader))
|
50 |
|
51 |
print("Testing Loss: {:.2f}, Testing Accuracy {}".format(testing_loss[i], testing_acc[i]))
|
52 |
+
wandb.log({'testing_loss': testing_loss[i], 'testing_acc': testing_acc[i]})
|
53 |
|
54 |
print ("-------------------------------------------- \n")
|
55 |
|