import pandas as pd from utils.read_config import get_args def check_csv(upload_file): df = pd.read_csv(upload_file) return df # Function to load sample of dataset def load_sample(num_sample_records, sample_method, df, col_name): sample_first_records = get_args("first_records") sample_random_seed = get_args("random_seed") num_sample_records = num_sample_records if num_sample_records <= sample_first_records else sample_first_records # Keep only required column df = df[[col_name]] if sample_method == "First": df = df.iloc[:num_sample_records].copy().reset_index() if sample_method == "Last": df = df.iloc[-num_sample_records:].copy().reset_index() if sample_method == "Random": df = df.sample(num_sample_records, random_state=sample_random_seed).copy().reset_index() return df