amanmibra commited on
Commit
cf26dbd
·
1 Parent(s): be72719
Files changed (3) hide show
  1. pipelines/images.py +1 -0
  2. pipelines/train.py +12 -4
  3. train.py +1 -1
pipelines/images.py CHANGED
@@ -17,5 +17,6 @@ training_image_pip = (
17
  "torchaudio==2.0.0",
18
  "pandas",
19
  "tqdm",
 
20
  )
21
  )
 
17
  "torchaudio==2.0.0",
18
  "pandas",
19
  "tqdm",
20
+ "wandb",
21
  )
22
  )
pipelines/train.py CHANGED
@@ -8,7 +8,7 @@ from torch import nn
8
  from torch.utils.data import DataLoader
9
 
10
  # modal
11
- from modal import Mount, Stub, gpu, create_package_mounts
12
 
13
  # internal
14
  from pipelines.images import training_image_pip
@@ -38,28 +38,34 @@ stub = Stub(
38
  Mount.from_local_file(local_path='cnn.py'),
39
  ],
40
  timeout=EPOCHS * 60,
 
41
  )
42
  def train(
43
  model,
44
  train_dataloader,
45
  loss_fn,
46
  optimizer,
47
- device,
48
- epochs,
49
  ):
 
 
50
  import time
51
  import torch
52
-
53
 
54
  print("Begin model training...")
55
  begin = time.time()
56
 
 
57
  model = model.to(device)
58
 
59
  # metrics
60
  training_acc = []
61
  training_loss = []
62
 
 
 
63
  for i in range(epochs):
64
  print(f"Epoch {i + 1}/{epochs}")
65
  then = time.time()
@@ -70,6 +76,7 @@ def train(
70
  # training metrics
71
  training_loss.append(train_epoch_loss/len(train_dataloader))
72
  training_acc.append(train_epoch_acc/len(train_dataloader))
 
73
 
74
  now = time.time()
75
  print("Training Loss: {:.2f}, Training Accuracy: {:.2f}, Time: {:.2f}s".format(training_loss[i], training_acc[i], now - then))
@@ -77,6 +84,7 @@ def train(
77
  print ("-------------------------------------------- \n")
78
 
79
  end = time.time()
 
80
 
81
  print("-------- Finished Training --------")
82
  print("-------- Total Time -- {:.2f}s --------".format(end - begin))
 
8
  from torch.utils.data import DataLoader
9
 
10
  # modal
11
+ from modal import Mount, Secret, Stub, gpu, create_package_mounts
12
 
13
  # internal
14
  from pipelines.images import training_image_pip
 
38
  Mount.from_local_file(local_path='cnn.py'),
39
  ],
40
  timeout=EPOCHS * 60,
41
+ secret=Secret.from_name("wandb")
42
  )
43
  def train(
44
  model,
45
  train_dataloader,
46
  loss_fn,
47
  optimizer,
48
+ device="cuda",
49
+ epochs=10,
50
  ):
51
+ import os
52
+
53
  import time
54
  import torch
55
+ import wandb
56
 
57
  print("Begin model training...")
58
  begin = time.time()
59
 
60
+ # set model to cuda
61
  model = model.to(device)
62
 
63
  # metrics
64
  training_acc = []
65
  training_loss = []
66
 
67
+ wandb.init(project="void-training")
68
+
69
  for i in range(epochs):
70
  print(f"Epoch {i + 1}/{epochs}")
71
  then = time.time()
 
76
  # training metrics
77
  training_loss.append(train_epoch_loss/len(train_dataloader))
78
  training_acc.append(train_epoch_acc/len(train_dataloader))
79
+ wandb.log({'training_loss': training_loss[i], 'training_acc': training_acc[i]})
80
 
81
  now = time.time()
82
  print("Training Loss: {:.2f}, Training Accuracy: {:.2f}, Time: {:.2f}s".format(training_loss[i], training_acc[i], now - then))
 
84
  print ("-------------------------------------------- \n")
85
 
86
  end = time.time()
87
+ wandb.finish()
88
 
89
  print("-------- Finished Training --------")
90
  print("-------- Total Time -- {:.2f}s --------".format(end - begin))
train.py CHANGED
@@ -49,7 +49,7 @@ def train(model, train_dataloader, loss_fn, optimizer, device, epochs, test_data
49
  testing_acc.append(test_epoch_acc/len(test_dataloader))
50
 
51
  print("Testing Loss: {:.2f}, Testing Accuracy {}".format(testing_loss[i], testing_acc[i]))
52
- wandb.log({'testing_loss': testing_loss[i], 'training_acc': training_acc[i]})
53
 
54
  print ("-------------------------------------------- \n")
55
 
 
49
  testing_acc.append(test_epoch_acc/len(test_dataloader))
50
 
51
  print("Testing Loss: {:.2f}, Testing Accuracy {}".format(testing_loss[i], testing_acc[i]))
52
+ wandb.log({'testing_loss': testing_loss[i], 'testing_acc': testing_acc[i]})
53
 
54
  print ("-------------------------------------------- \n")
55