Spaces:
Running
Running
Update pages/1_TensorIntro.py
Browse files- pages/1_TensorIntro.py +57 -33
pages/1_TensorIntro.py
CHANGED
@@ -201,49 +201,73 @@ normalized_data = min_max_normalize(data)
|
|
201 |
print("Normalized data:", normalized_data)
|
202 |
'''
|
203 |
},
|
204 |
-
|
205 |
-
|
|
|
206 |
"code": '''import torch
|
207 |
import torch.nn as nn
|
208 |
import torch.optim as optim
|
209 |
-
from
|
210 |
-
|
211 |
-
# Define
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
|
234 |
# Training loop
|
235 |
-
|
236 |
-
|
237 |
-
|
|
|
238 |
optimizer.zero_grad()
|
239 |
-
|
240 |
-
|
241 |
-
loss = criterion(
|
242 |
-
# Backward pass and optimize
|
243 |
loss.backward()
|
244 |
optimizer.step()
|
245 |
|
246 |
print('Finished Training')
|
|
|
247 |
'''
|
248 |
},
|
249 |
}
|
|
|
201 |
print("Normalized data:", normalized_data)
|
202 |
'''
|
203 |
},
|
204 |
+
|
205 |
+
"Final Project: Sentiment Analysis with LSTM": {
|
206 |
+
"description": "In this project, you will build and train a simple Long Short-Term Memory (LSTM) network for sentiment analysis on a text dataset. This involves preprocessing text data, defining the LSTM model, and training the model to classify text as positive or negative sentiment.",
|
207 |
"code": '''import torch
|
208 |
import torch.nn as nn
|
209 |
import torch.optim as optim
|
210 |
+
from torchtext.legacy import data, datasets
|
211 |
+
|
212 |
+
# Define the fields for the dataset
|
213 |
+
TEXT = data.Field(tokenize='spacy', include_lengths=True)
|
214 |
+
LABEL = data.LabelField(dtype=torch.float)
|
215 |
+
|
216 |
+
# Load the IMDb dataset
|
217 |
+
train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)
|
218 |
+
|
219 |
+
# Build the vocabulary
|
220 |
+
TEXT.build_vocab(train_data, max_size=25000)
|
221 |
+
LABEL.build_vocab(train_data)
|
222 |
+
|
223 |
+
# Create the iterators
|
224 |
+
BATCH_SIZE = 64
|
225 |
+
train_iterator, test_iterator = data.BucketIterator.splits(
|
226 |
+
(train_data, test_data),
|
227 |
+
batch_size=BATCH_SIZE,
|
228 |
+
sort_within_batch=True,
|
229 |
+
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
230 |
+
)
|
231 |
+
|
232 |
+
# Define the LSTM model
|
233 |
+
class LSTM(nn.Module):
|
234 |
+
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
|
235 |
+
super().__init__()
|
236 |
+
self.embedding = nn.Embedding(vocab_size, embedding_dim)
|
237 |
+
self.lstm = nn.LSTM(embedding_dim, hidden_dim)
|
238 |
+
self.fc = nn.Linear(hidden_dim, output_dim)
|
239 |
+
|
240 |
+
def forward(self, text, text_lengths):
|
241 |
+
embedded = self.embedding(text)
|
242 |
+
packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths)
|
243 |
+
packed_output, (hidden, cell) = self.lstm(packed_embedded)
|
244 |
+
return self.fc(hidden.squeeze(0))
|
245 |
+
|
246 |
+
# Instantiate the model
|
247 |
+
INPUT_DIM = len(TEXT.vocab)
|
248 |
+
EMBEDDING_DIM = 100
|
249 |
+
HIDDEN_DIM = 256
|
250 |
+
OUTPUT_DIM = 1
|
251 |
+
model = LSTM(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
|
252 |
+
|
253 |
+
# Define the loss and optimizer
|
254 |
+
criterion = nn.BCEWithLogitsLoss()
|
255 |
+
optimizer = optim.Adam(model.parameters())
|
256 |
|
257 |
# Training loop
|
258 |
+
N_EPOCHS = 5
|
259 |
+
for epoch in range(N_EPOCHS):
|
260 |
+
model.train()
|
261 |
+
for batch in train_iterator:
|
262 |
optimizer.zero_grad()
|
263 |
+
text, text_lengths = batch.text
|
264 |
+
predictions = model(text, text_lengths).squeeze(1)
|
265 |
+
loss = criterion(predictions, batch.label)
|
|
|
266 |
loss.backward()
|
267 |
optimizer.step()
|
268 |
|
269 |
print('Finished Training')
|
270 |
+
|
271 |
'''
|
272 |
},
|
273 |
}
|