# Class for loading data from the dataset import os import logging import pandas as pd import torch import numpy as np import pandas as pd from torch.utils.data import TensorDataset, DataLoader from path_config import ( RAW_DATA_PATH, TRAIN_FEATURES_PATH, TRAIN_LABELS_PATH, VAL_FEATURES_PATH, VAL_LABELS_PATH, TEST_FEATURES_PATH, TEST_LABELS_PATH, ) class NYCDataLoader: def __init__(self, batch_size): self.batch_size = batch_size self.train_features = None self.train_labels = None self.val_features = None self.val_labels = None self.test_features = None self.test_labels = None self.logger = logging.getLogger(__name__) def create_tensor(self): """ Load the preprocessed data and convert them to tensors """ try: self.train_features = torch.tensor(np.load(TRAIN_FEATURES_PATH)).float() self.train_labels = torch.tensor(np.load(TRAIN_LABELS_PATH)).float() self.val_features = torch.tensor(np.load(VAL_FEATURES_PATH)).float() self.val_labels = torch.tensor(np.load(VAL_LABELS_PATH)).float() self.test_features = torch.tensor(np.load(TEST_FEATURES_PATH)).float() self.test_labels = torch.tensor(np.load(TEST_LABELS_PATH)).float() except FileNotFoundError: print("Preprocessed data not found. Please run the preprocessing script first.") def load_data(self): """ Create data loaders for training, validation, and testing """ self.create_tensor() train_loader = DataLoader(self.train_features, batch_size=self.batch_size, shuffle=True) val_loader = DataLoader(self.val_features, batch_size=self.batch_size, shuffle=True) test_loader = DataLoader(self.test_features, batch_size=self.batch_size, shuffle=True) print("Data loaded successfully.") return train_loader, val_loader, test_loader def get_true_anomalies(self): """ Get the true anomalies from the test data """ return self.test_labels