File size: 856 Bytes
b3ce2b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import pandas as pd
from utils.read_config import get_args
def check_csv(upload_file):
    df = pd.read_csv(upload_file)
    return df

# Function to load sample of dataset
def load_sample(num_sample_records, sample_method, df, col_name):

    sample_first_records = get_args("first_records")
    sample_random_seed = get_args("random_seed")

    num_sample_records = num_sample_records if num_sample_records <= sample_first_records else sample_first_records
    
    # Keep only required column
    df = df[[col_name]]
    if sample_method == "First":
        df = df.iloc[:num_sample_records].copy().reset_index()
    if sample_method == "Last":
        df = df.iloc[-num_sample_records:].copy().reset_index()
    if sample_method == "Random":
        df = df.sample(num_sample_records, random_state=sample_random_seed).copy().reset_index()
    return df