Spaces:

eaglelandsonce
/

pytorch

Running

App Files Files Community

eaglelandsonce commited on Jun 10, 2024

Commit

713f0f4

verified ·

1 Parent(s): e84d316

Update pages/17_RNN.py

Browse files

Files changed (1) hide show

pages/17_RNN.py +13 -10

pages/17_RNN.py CHANGED Viewed

@@ -3,7 +3,7 @@ import torch
 import torch.nn as nn
 import torch.optim as optim
 from torchtext.data.utils import get_tokenizer
-from torchtext.vocab import build_vocab_from_iterator, GloVe
 from torchtext.datasets import IMDB
 from torch.utils.data import DataLoader, random_split
 import matplotlib.pyplot as plt
@@ -29,6 +29,13 @@ class RNN(nn.Module):
         out = self.fc(out[:, -1, :])
         return out
 # Function to load the data
 @st.cache_data
 def load_data():
@@ -39,7 +46,7 @@ def load_data():
         for _, text in data_iter:
             yield tokenizer(text)
-    vocab = build_vocab_from_iterator(yield_tokens(train_iter), specials=["<unk>"])
     vocab.set_default_index(vocab["<unk>"])
     # Define the text and label processing pipelines
@@ -57,13 +64,6 @@ def load_data():
     train_texts, train_labels = process_data(train_iter)
     test_texts, test_labels = process_data(test_iter)
-    # Create a custom collate function to pad sequences
-    def collate_batch(batch):
-        texts, labels = zip(*batch)
-        text_lengths = [len(text) for text in texts]
-        texts_padded = pad_sequence(texts, batch_first=True, padding_value=vocab["<pad>"])
-        return texts_padded, torch.tensor(labels, dtype=torch.float), text_lengths
     # Create DataLoaders
     train_dataset = list(zip(train_texts, train_labels))
     test_dataset = list(zip(test_texts, test_labels))
@@ -124,6 +124,9 @@ def evaluate_network(net, iterator, criterion):
 # Load the data
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 vocab, train_loader, valid_loader, test_loader = load_data()
 # Streamlit interface
@@ -198,5 +201,5 @@ if 'trained_model' in st.session_state and st.sidebar.button('Show Test Results'
     st.write('Ground Truth vs Predicted for Sample Texts')
     for i, (text, true_label, predicted) in enumerate(samples):
         st.write(f'Sample {i+1}')
-        st.text(' '.join([vocab.itos[token] for token in text]))
         st.write(f'Ground Truth: {true_label.item()}, Predicted: {predicted.item()}')

 import torch.nn as nn
 import torch.optim as optim
 from torchtext.data.utils import get_tokenizer
+from torchtext.vocab import build_vocab_from_iterator
 from torchtext.datasets import IMDB
 from torch.utils.data import DataLoader, random_split
 import matplotlib.pyplot as plt
         out = self.fc(out[:, -1, :])
         return out
+# Create a custom collate function to pad sequences
+def collate_batch(batch):
+    texts, labels = zip(*batch)
+    text_lengths = [len(text) for text in texts]
+    texts_padded = pad_sequence(texts, batch_first=True, padding_value=vocab["<pad>"])
+    return texts_padded, torch.tensor(labels, dtype=torch.float), text_lengths
 # Function to load the data
 @st.cache_data
 def load_data():
         for _, text in data_iter:
             yield tokenizer(text)
+    vocab = build_vocab_from_iterator(yield_tokens(train_iter), specials=["<unk>", "<pad>"])
     vocab.set_default_index(vocab["<unk>"])
     # Define the text and label processing pipelines
     train_texts, train_labels = process_data(train_iter)
     test_texts, test_labels = process_data(test_iter)
     # Create DataLoaders
     train_dataset = list(zip(train_texts, train_labels))
     test_dataset = list(zip(test_texts, test_labels))
 # Load the data
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+# Display a loading message with some vertical space
+st.markdown("<div style='margin-top: 50px;'><b>Loading data...</b></div>", unsafe_allow_html=True)
 vocab, train_loader, valid_loader, test_loader = load_data()
 # Streamlit interface
     st.write('Ground Truth vs Predicted for Sample Texts')
     for i, (text, true_label, predicted) in enumerate(samples):
         st.write(f'Sample {i+1}')
+        st.text(' '.join([vocab.get_itos()[token] for token in text]))
         st.write(f'Ground Truth: {true_label.item()}, Predicted: {predicted.item()}')