eaglelandsonce commited on
Commit
3494d39
·
verified ·
1 Parent(s): 71dcae9

Update pages/1_TensorIntro.py

Browse files
Files changed (1) hide show
  1. pages/1_TensorIntro.py +70 -71
pages/1_TensorIntro.py CHANGED
@@ -202,88 +202,87 @@ print("Normalized data:", normalized_data)
202
  '''
203
  },
204
 
205
- "Final Project: Sentiment Analysis with LSTM": {
206
- "description": "In this project, you will build and train a simple Long Short-Term Memory (LSTM) network for sentiment analysis on a text dataset. This involves preprocessing text data, defining the LSTM model, and training the model to classify text as positive or negative sentiment.",
207
  "code": '''import torch
208
  import torch.nn as nn
209
  import torch.optim as optim
210
- from torchtext.datasets import IMDB
211
- from torchtext.data.utils import get_tokenizer
212
- from torchtext.vocab import build_vocab_from_iterator
213
- from torch.utils.data import DataLoader
214
- from torch.nn.utils.rnn import pad_sequence
215
-
216
- # Define the tokenizer and vocabulary
217
- tokenizer = get_tokenizer('basic_english')
218
- train_iter = IMDB(split='train')
219
-
220
- def yield_tokens(data_iter):
221
- for _, text in data_iter:
222
- yield tokenizer(text)
223
-
224
- vocab = build_vocab_from_iterator(yield_tokens(train_iter), specials=["<unk>"])
225
- vocab.set_default_index(vocab["<unk>"])
226
-
227
- # Define the text and label preprocessing pipeline
228
- text_pipeline = lambda x: vocab(tokenizer(x))
229
- label_pipeline = lambda x: 1 if x == 'pos' else 0
230
-
231
- # Define the collate function for the DataLoader
232
- def collate_batch(batch):
233
- label_list, text_list, lengths = [], [], []
234
- for _label, _text in batch:
235
- label_list.append(label_pipeline(_label))
236
- processed_text = torch.tensor(text_pipeline(_text), dtype=torch.int64)
237
- text_list.append(processed_text)
238
- lengths.append(processed_text.size(0))
239
- label_list = torch.tensor(label_list, dtype=torch.float)
240
- text_list = pad_sequence(text_list, batch_first=True)
241
- lengths = torch.tensor(lengths, dtype=torch.int64)
242
- return label_list, text_list, lengths
243
-
244
- # Create DataLoaders for training and testing
245
- train_iter, test_iter = IMDB()
246
- train_dataloader = DataLoader(list(train_iter), batch_size=8, shuffle=True, collate_fn=collate_batch)
247
- test_dataloader = DataLoader(list(test_iter), batch_size=8, shuffle=False, collate_fn=collate_batch)
248
-
249
- # Define the LSTM model
250
- class LSTM(nn.Module):
251
- def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
252
- super().__init__()
253
- self.embedding = nn.Embedding(vocab_size, embedding_dim)
254
- self.lstm = nn.LSTM(embedding_dim, hidden_dim)
255
- self.fc = nn.Linear(hidden_dim, output_dim)
256
-
257
- def forward(self, text, text_lengths):
258
- embedded = self.embedding(text)
259
- packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths, batch_first=True, enforce_sorted=False)
260
- packed_output, (hidden, cell) = self.lstm(packed_embedded)
261
- return self.fc(hidden.squeeze(0))
262
-
263
- # Instantiate the model
264
- INPUT_DIM = len(vocab)
265
- EMBEDDING_DIM = 100
266
- HIDDEN_DIM = 256
267
- OUTPUT_DIM = 1
268
- model = LSTM(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
269
-
270
- # Define the loss and optimizer
271
- criterion = nn.BCEWithLogitsLoss()
272
- optimizer = optim.Adam(model.parameters())
273
 
274
  # Training loop
275
- N_EPOCHS = 5
276
- for epoch in range(N_EPOCHS):
277
- model.train()
278
- for labels, text, text_lengths in train_dataloader:
 
279
  optimizer.zero_grad()
280
- predictions = model(text, text_lengths).squeeze(1)
281
- loss = criterion(predictions, labels)
 
282
  loss.backward()
283
  optimizer.step()
284
 
 
 
 
 
 
285
  print('Finished Training')
286
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
  '''
289
  },
 
202
  '''
203
  },
204
 
205
+ "Final Project: Image Classification with a Simple CNN": {
206
+ "description": "In this project, you will build and train a simple Convolutional Neural Network (CNN) for image classification using the CIFAR-10 dataset. This involves loading the dataset, defining the CNN model, and training the model to classify images into one of the 10 classes.",
207
  "code": '''import torch
208
  import torch.nn as nn
209
  import torch.optim as optim
210
+ import torchvision
211
+ import torchvision.transforms as transforms
212
+
213
+ # Define the transformation for the dataset
214
+ transform = transforms.Compose(
215
+ [transforms.ToTensor(),
216
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
217
+
218
+ # Load the CIFAR-10 dataset
219
+ trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
220
+ trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)
221
+
222
+ testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
223
+ testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=2)
224
+
225
+ # Define the CNN model
226
+ class SimpleCNN(nn.Module):
227
+ def __init__(self):
228
+ super(SimpleCNN, self).__init__()
229
+ self.conv1 = nn.Conv2d(3, 6, 5)
230
+ self.pool = nn.MaxPool2d(2, 2)
231
+ self.conv2 = nn.Conv2d(6, 16, 5)
232
+ self.fc1 = nn.Linear(16 * 5 * 5, 120)
233
+ self.fc2 = nn.Linear(120, 84)
234
+ self.fc3 = nn.Linear(84, 10)
235
+
236
+ def forward(self, x):
237
+ x = self.pool(nn.functional.relu(self.conv1(x)))
238
+ x = self.pool(nn.functional.relu(self.conv2(x)))
239
+ x = x.view(-1, 16 * 5 * 5)
240
+ x = nn.functional.relu(self.fc1(x))
241
+ x = nn.functional.relu(self.fc2(x))
242
+ x = self.fc3(x)
243
+ return x
244
+
245
+ # Instantiate the model, loss function, and optimizer
246
+ net = SimpleCNN()
247
+ criterion = nn.CrossEntropyLoss()
248
+ optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
 
250
  # Training loop
251
+ for epoch in range(5): # loop over the dataset multiple times
252
+ running_loss = 0.0
253
+ for i, data in enumerate(trainloader, 0):
254
+ inputs, labels = data
255
+
256
  optimizer.zero_grad()
257
+
258
+ outputs = net(inputs)
259
+ loss = criterion(outputs, labels)
260
  loss.backward()
261
  optimizer.step()
262
 
263
+ running_loss += loss.item()
264
+ if i % 200 == 199: # print every 200 mini-batches
265
+ print(f'[{epoch + 1}, {i + 1}] loss: {running_loss / 200:.3f}')
266
+ running_loss = 0.0
267
+
268
  print('Finished Training')
269
 
270
+ # Save the trained model
271
+ torch.save(net.state_dict(), 'simple_cnn.pth')
272
+
273
+ # Testing the model
274
+ correct = 0
275
+ total = 0
276
+ with torch.no_grad():
277
+ for data in testloader:
278
+ images, labels = data
279
+ outputs = net(images)
280
+ _, predicted = torch.max(outputs.data, 1)
281
+ total += labels.size(0)
282
+ correct += (predicted == labels).sum().item()
283
+
284
+ print(f'Accuracy of the network on the 10000 test images: {100 * correct / total}%')
285
+
286
 
287
  '''
288
  },