File size: 3,865 Bytes
74b29ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import os
import torch
import multiprocessing
import pandas as pd
import pytorch_lightning as pl
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
from imblearn.over_sampling import SMOTE
class Preprocessor(pl.LightningDataModule):
def __init__(self, batch_size):
super(Preprocessor, self).__init__()
self.dataset = pd.read_csv('dataset/simulated_electrical_grid.csv')
self.batch_size = batch_size
self.oversampling = SMOTE(random_state=42)
def setup(self, stage=None):
train_set, valid_set, test_set = self.preprocessor()
if stage == "fit":
self.train_set = train_set
self.valid_set = valid_set
elif stage == "test":
self.test_set = test_set
def train_dataloader(self):
return DataLoader(
dataset=self.train_set,
batch_size=self.batch_size,
shuffle=True,
num_workers=multiprocessing.cpu_count()
)
def val_dataloader(self):
return DataLoader(
dataset=self.valid_set,
batch_size=self.batch_size,
shuffle=False,
num_workers=multiprocessing.cpu_count()
)
def test_dataloader(self):
return DataLoader(
dataset=self.test_set,
batch_size=self.batch_size,
shuffle=False,
num_workers=multiprocessing.cpu_count()
)
def preprocessor(self):
if os.path.exists("dataset/train_set.pt") and os.path.exists("dataset/valid_set.pt") and os.path.exists("dataset/test_set.pt"):
print("\nLoading Data...")
train_set = torch.load("dataset/train_set.pt")
valid_set = torch.load("dataset/valid_set.pt")
test_set = torch.load("dataset/test_set.pt")
print('[ Loading Completed ]\n')
else:
print("\nPreprocessing Data...")
train_set, valid_set, test_set = self.preprocessing_data(self.dataset)
print('[ Preprocessing Completed ]\n')
return train_set, valid_set, test_set
def preprocessing_data(self, dataset):
X = dataset[['tau1', 'tau2', 'tau3', 'tau4', 'p1', 'p2', 'p3', 'p4', 'g1', 'g2', 'g3', 'g4']]
y = dataset['stabf']
X_train_res, y_train_res = self.oversampling.fit_resample(X, y)
y_train_res = self.label_encoding(y_train_res)
X_train_valid, X_test, y_train_valid, y_test = train_test_split(X_train_res, y_train_res, test_size=0.2, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_valid, y_train_valid, test_size=0.1, random_state=42)
X_train_tensor = torch.tensor(X_train.values.tolist())
y_train_tensor = torch.tensor(y_train.values.tolist())
X_valid_tensor = torch.tensor(X_valid.values.tolist())
y_valid_tensor = torch.tensor(y_valid.values.tolist())
X_test_tensor = torch.tensor(X_test.values.tolist())
y_test_tensor = torch.tensor(y_test.values.tolist())
train_set = TensorDataset(X_train_tensor, y_train_tensor)
valid_set = TensorDataset(X_valid_tensor, y_valid_tensor)
test_set = TensorDataset(X_test_tensor, y_test_tensor)
torch.save(train_set, "dataset/train_set.pt")
torch.save(valid_set, "dataset/valid_set.pt")
torch.save(test_set, "dataset/test_set.pt")
return train_set, valid_set, test_set
def label_encoding(self, y_train):
encoder = {'unstable': 0, 'stable': 1}
y_train = y_train.astype('str').map(encoder)
return y_train
def get_feature_size(self):
X = self.dataset[['tau1','tau2','tau3','tau4','p1', 'p2', 'p3', 'p4','g1','g2','g3','g4']]
return 1, len(X.columns.tolist())
|