neuro-orion-v1 / src /pipeline /dataloader.py
tgd1115's picture
Upload 12 files
8474315 verified
# Class for loading data from the dataset
import os
import logging
import pandas as pd
import torch
import numpy as np
import pandas as pd
from torch.utils.data import TensorDataset, DataLoader
from path_config import (
RAW_DATA_PATH,
TRAIN_FEATURES_PATH,
TRAIN_LABELS_PATH,
VAL_FEATURES_PATH,
VAL_LABELS_PATH,
TEST_FEATURES_PATH,
TEST_LABELS_PATH,
)
class NYCDataLoader:
def __init__(self, batch_size):
self.batch_size = batch_size
self.train_features = None
self.train_labels = None
self.val_features = None
self.val_labels = None
self.test_features = None
self.test_labels = None
self.logger = logging.getLogger(__name__)
def create_tensor(self):
"""
Load the preprocessed data and convert them to tensors
"""
try:
self.train_features = torch.tensor(np.load(TRAIN_FEATURES_PATH)).float()
self.train_labels = torch.tensor(np.load(TRAIN_LABELS_PATH)).float()
self.val_features = torch.tensor(np.load(VAL_FEATURES_PATH)).float()
self.val_labels = torch.tensor(np.load(VAL_LABELS_PATH)).float()
self.test_features = torch.tensor(np.load(TEST_FEATURES_PATH)).float()
self.test_labels = torch.tensor(np.load(TEST_LABELS_PATH)).float()
except FileNotFoundError:
print("Preprocessed data not found. Please run the preprocessing script first.")
def load_data(self):
"""
Create data loaders for training, validation, and testing
"""
self.create_tensor()
train_loader = DataLoader(self.train_features, batch_size=self.batch_size, shuffle=True)
val_loader = DataLoader(self.val_features, batch_size=self.batch_size, shuffle=True)
test_loader = DataLoader(self.test_features, batch_size=self.batch_size, shuffle=True)
print("Data loaded successfully.")
return train_loader, val_loader, test_loader
def get_true_anomalies(self):
"""
Get the true anomalies from the test data
"""
return self.test_labels