biasaware / utils /load_csv.py
freyam's picture
Update the UI and Modularise the methodologies
0321f34
raw
history blame
802 Bytes
import pandas as pd
from utils.read_config import get_args
# Function to load sample of dataset
def load_sample(num_sample_records, sample_method, df, col_name):
sample_first_records = get_args("first_records")
sample_random_seed = get_args("random_seed")
num_sample_records = num_sample_records if num_sample_records <= sample_first_records else sample_first_records
# Keep only required column
df = df[[col_name]]
if sample_method == "First":
df = df.iloc[:num_sample_records].copy().reset_index()
if sample_method == "Last":
df = df.iloc[-num_sample_records:].copy().reset_index()
if sample_method == "Random":
df = df.sample(num_sample_records,
random_state=sample_random_seed).copy().reset_index()
return df