Spaces:
Running
Running
Update pages/21_NLP_Transformer.py
Browse files- pages/21_NLP_Transformer.py +8 -15
pages/21_NLP_Transformer.py
CHANGED
@@ -1,22 +1,19 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
from sklearn.model_selection import train_test_split
|
3 |
import torch
|
4 |
from torch.utils.data import DataLoader, Dataset
|
5 |
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
|
6 |
from transformers import get_linear_schedule_with_warmup
|
7 |
import numpy as np
|
8 |
-
from
|
9 |
import streamlit as st
|
10 |
|
11 |
-
# Load
|
12 |
-
|
13 |
-
|
|
|
14 |
|
15 |
-
|
16 |
-
train_df
|
17 |
-
|
18 |
-
train_df.to_csv('train.csv', index=False)
|
19 |
-
test_df.to_csv('test.csv', index=False)
|
20 |
|
21 |
class SentimentDataset(Dataset):
|
22 |
def __init__(self, dataframe, tokenizer, max_len):
|
@@ -113,10 +110,6 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
113 |
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
114 |
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
|
115 |
|
116 |
-
# Load data
|
117 |
-
train_df = pd.read_csv('train.csv')
|
118 |
-
test_df = pd.read_csv('test.csv')
|
119 |
-
|
120 |
# Create data loaders
|
121 |
BATCH_SIZE = 16
|
122 |
MAX_LEN = 128
|
|
|
|
|
|
|
1 |
import torch
|
2 |
from torch.utils.data import DataLoader, Dataset
|
3 |
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
|
4 |
from transformers import get_linear_schedule_with_warmup
|
5 |
import numpy as np
|
6 |
+
from datasets import load_dataset
|
7 |
import streamlit as st
|
8 |
|
9 |
+
# Load IMDb dataset
|
10 |
+
dataset = load_dataset('imdb')
|
11 |
+
train_df = dataset['train'].to_pandas()
|
12 |
+
test_df = dataset['test'].to_pandas()
|
13 |
|
14 |
+
# Preprocess the data
|
15 |
+
train_df = train_df[['text', 'label']]
|
16 |
+
test_df = test_df[['text', 'label']]
|
|
|
|
|
17 |
|
18 |
class SentimentDataset(Dataset):
|
19 |
def __init__(self, dataframe, tokenizer, max_len):
|
|
|
110 |
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
111 |
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
|
112 |
|
|
|
|
|
|
|
|
|
113 |
# Create data loaders
|
114 |
BATCH_SIZE = 16
|
115 |
MAX_LEN = 128
|