Sreekanth Tangirala commited on
Commit
1fecae5
·
1 Parent(s): 3518e5d

adding augmentation and different datasets for test and train

Browse files
Files changed (1) hide show
  1. train.py +62 -25
train.py CHANGED
@@ -6,26 +6,40 @@ import torchvision.transforms as transforms
6
  from torch.utils.data import DataLoader, Subset
7
  from model import get_model, save_model
8
  from tqdm import tqdm
 
 
9
 
10
  def get_transforms():
11
  """
12
- Define the image transformations
13
  """
14
- return transforms.Compose([
15
  transforms.Resize(224),
 
 
 
 
16
  transforms.ToTensor(),
17
  transforms.Normalize(mean=[0.485, 0.456, 0.406],
18
  std=[0.229, 0.224, 0.225])
19
  ])
 
 
 
 
 
 
 
 
 
20
 
21
  def get_data(subset_size=None, train=True):
22
  """
23
  Load and prepare the dataset
24
- Args:
25
- subset_size (int): If provided, return only a subset of data
26
- train (bool): If True, return training data, else test data
27
  """
28
- transform = get_transforms()
 
 
29
  dataset = torchvision.datasets.CIFAR10(
30
  root='./data',
31
  train=train,
@@ -66,26 +80,34 @@ def evaluate_model(model, testloader, device):
66
 
67
  def train_model(model, trainloader, testloader, epochs=100, device='cuda'):
68
  """
69
- Train the model
70
- Args:
71
- model: The ResNet50 model
72
- trainloader: DataLoader for training data
73
- testloader: DataLoader for test data
74
- epochs (int): Number of epochs to train
75
- device (str): Device to train on ('cuda' or 'cpu')
76
  """
77
  model = model.to(device)
78
  criterion = nn.CrossEntropyLoss()
79
- optimizer = optim.Adam(model.parameters(), lr=0.001)
80
- scheduler = optim.lr_scheduler.ReduceLROnPlateau(
81
- optimizer,
82
- 'max',
83
- patience=5
 
 
 
 
 
 
84
  )
85
 
86
- best_acc = 0.0
 
 
 
87
 
88
- # Create epoch progress bar without a description (we'll use it for stats only)
 
 
 
 
 
89
  epoch_pbar = tqdm(range(epochs), desc='Training Progress', position=0)
90
 
91
  for epoch in epoch_pbar:
@@ -108,6 +130,7 @@ def train_model(model, trainloader, testloader, epochs=100, device='cuda'):
108
  loss = criterion(outputs, labels)
109
  loss.backward()
110
  optimizer.step()
 
111
 
112
  running_loss += loss.item()
113
  _, predicted = outputs.max(1)
@@ -124,25 +147,39 @@ def train_model(model, trainloader, testloader, epochs=100, device='cuda'):
124
  test_acc = evaluate_model(model, testloader, device)
125
  epoch_pbar.write(f'Epoch {epoch+1}: Train Loss: {avg_loss:.3f} | Train Acc: {epoch_acc:.2f}% | Test Acc: {test_acc:.2f}%')
126
 
127
- scheduler.step(test_acc) # Using test accuracy for scheduler
128
-
 
 
129
  if test_acc > best_acc:
130
  best_acc = test_acc
131
  save_model(model, 'best_model.pth')
132
  epoch_pbar.write(f'New best test accuracy: {test_acc:.2f}%')
 
 
 
133
 
134
  if test_acc > 70:
135
  epoch_pbar.write(f"\nReached target accuracy of 70% on test data!")
 
 
136
  break
137
 
 
 
 
 
 
 
 
138
  if __name__ == "__main__":
139
  # Set device
140
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
141
  print(f"Using device: {device}")
142
 
143
- # Get train and test data
144
- trainloader = get_data(subset_size=5000, train=True)
145
- testloader = get_data(subset_size=1000, train=False)
146
 
147
  # Initialize model
148
  model = get_model(num_classes=10)
 
6
  from torch.utils.data import DataLoader, Subset
7
  from model import get_model, save_model
8
  from tqdm import tqdm
9
+ import os
10
+ from datetime import datetime
11
 
12
  def get_transforms():
13
  """
14
+ Define the image transformations with augmentation for training
15
  """
16
+ train_transform = transforms.Compose([
17
  transforms.Resize(224),
18
+ transforms.RandomHorizontalFlip(),
19
+ transforms.RandomRotation(15),
20
+ transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
21
+ transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
22
  transforms.ToTensor(),
23
  transforms.Normalize(mean=[0.485, 0.456, 0.406],
24
  std=[0.229, 0.224, 0.225])
25
  ])
26
+
27
+ test_transform = transforms.Compose([
28
+ transforms.Resize(224),
29
+ transforms.ToTensor(),
30
+ transforms.Normalize(mean=[0.485, 0.456, 0.406],
31
+ std=[0.229, 0.224, 0.225])
32
+ ])
33
+
34
+ return train_transform, test_transform
35
 
36
  def get_data(subset_size=None, train=True):
37
  """
38
  Load and prepare the dataset
 
 
 
39
  """
40
+ train_transform, test_transform = get_transforms()
41
+ transform = train_transform if train else test_transform
42
+
43
  dataset = torchvision.datasets.CIFAR10(
44
  root='./data',
45
  train=train,
 
80
 
81
  def train_model(model, trainloader, testloader, epochs=100, device='cuda'):
82
  """
83
+ Train the model with improved hyperparameters and markdown logging
 
 
 
 
 
 
84
  """
85
  model = model.to(device)
86
  criterion = nn.CrossEntropyLoss()
87
+
88
+ # Add weight decay and reduce initial learning rate
89
+ optimizer = optim.AdamW(model.parameters(), lr=0.0001, weight_decay=0.01)
90
+
91
+ # Modify scheduler for better learning rate adjustment
92
+ scheduler = optim.lr_scheduler.OneCycleLR(
93
+ optimizer,
94
+ max_lr=0.001,
95
+ epochs=epochs,
96
+ steps_per_epoch=len(trainloader),
97
+ pct_start=0.2 # Warm up for first 20% of training
98
  )
99
 
100
+ # Create a markdown file for logging
101
+ log_dir = 'logs'
102
+ os.makedirs(log_dir, exist_ok=True)
103
+ log_file = os.path.join(log_dir, f'training_log_{datetime.now().strftime("%Y%m%d_%H%M%S")}.md')
104
 
105
+ with open(log_file, 'w') as f:
106
+ f.write("# Training Log\n\n")
107
+ f.write("| Epoch | Train Loss | Train Acc | Test Acc | Best Acc |\n")
108
+ f.write("|-------|------------|-----------|-----------|----------|\n")
109
+
110
+ best_acc = 0.0
111
  epoch_pbar = tqdm(range(epochs), desc='Training Progress', position=0)
112
 
113
  for epoch in epoch_pbar:
 
130
  loss = criterion(outputs, labels)
131
  loss.backward()
132
  optimizer.step()
133
+ scheduler.step() # Step the scheduler every batch
134
 
135
  running_loss += loss.item()
136
  _, predicted = outputs.max(1)
 
147
  test_acc = evaluate_model(model, testloader, device)
148
  epoch_pbar.write(f'Epoch {epoch+1}: Train Loss: {avg_loss:.3f} | Train Acc: {epoch_acc:.2f}% | Test Acc: {test_acc:.2f}%')
149
 
150
+ # After computing metrics, log to markdown file
151
+ with open(log_file, 'a') as f:
152
+ f.write(f"| {epoch+1:5d} | {avg_loss:.3f} | {epoch_acc:.2f}% | {test_acc:.2f}% | {best_acc:.2f}% |\n")
153
+
154
  if test_acc > best_acc:
155
  best_acc = test_acc
156
  save_model(model, 'best_model.pth')
157
  epoch_pbar.write(f'New best test accuracy: {test_acc:.2f}%')
158
+ # Add a marker for best accuracy in the markdown
159
+ with open(log_file, 'a') as f:
160
+ f.write(f"**New best accuracy achieved at epoch {epoch+1}**\n\n")
161
 
162
  if test_acc > 70:
163
  epoch_pbar.write(f"\nReached target accuracy of 70% on test data!")
164
+ with open(log_file, 'a') as f:
165
+ f.write(f"\n**Training stopped at epoch {epoch+1} after reaching target accuracy of 70%**\n")
166
  break
167
 
168
+ # Add final summary to markdown
169
+ with open(log_file, 'a') as f:
170
+ f.write(f"\n## Training Summary\n")
171
+ f.write(f"- Final Test Accuracy: {test_acc:.2f}%\n")
172
+ f.write(f"- Best Test Accuracy: {best_acc:.2f}%\n")
173
+ f.write(f"- Total Epochs: {epoch+1}\n")
174
+
175
  if __name__ == "__main__":
176
  # Set device
177
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
178
  print(f"Using device: {device}")
179
 
180
+ # Get train and test data with larger batch size
181
+ trainloader = get_data(subset_size=10000, train=True) # Increased from 5000
182
+ testloader = get_data(subset_size=2000, train=False) # Increased from 1000
183
 
184
  # Initialize model
185
  model = get_model(num_classes=10)