Spaces:

avid-ml
/

biasaware

Running

sudipta002 commited on Sep 9, 2023

Commit

b3ce2b2

1 Parent(s): a365da6

Add sample files

Files changed (4) hide show

data/z_animal.csv ADDED Viewed

+AnimalID,CommonName,ScientificName,Class,Order,Family,Habitat,ConservationStatus
+1,Lion,Panthera leo,Mammalia,Carnivora,Felidae,Savanna,Vulnerable
+2,Eagle,Aquila chrysaetos,Aves,Accipitriformes,Accipitridae,Mountains,Least Concern
+3,Dolphin,Tursiops truncatus,Mammalia,Cetacea,Delphinidae,Ocean,Least Concern
+4,Elephant,Loxodonta africana,Mammalia,Proboscidea,Elephantidae,Grassland,Vulnerable
+5,Tiger,Panthera tigris,Mammalia,Carnivora,Felidae,Forest,Endangered
+6,Penguin,Spheniscidae,Aves,Sphenisciformes,Spheniscidae,Antarctica,Least Concern
+7,Giraffe,Giraffa camelopardalis,Mammalia,Artiodactyla,Giraffidae,Savanna,Vulnerable
+8,Cheetah,Acinonyx jubatus,Mammalia,Carnivora,Felidae,Grassland,Vulnerable
+9,Panda,Ailuropoda melanoleuca,Mammalia,Carnivora,Ursidae,Forest,Endangered
+10,Kangaroo,Macropus rufus,Mammalia,Diprotodontia,Macropodidae,Grassland,Least Concern

data/z_employee.csv ADDED Viewed

+EmployeeID,FirstName,LastName,Email,Department,Salary
+101,John,Smith,[email protected],Finance,60000
+102,Emily,Johnson,[email protected],Marketing,55000
+103,Michael,Williams,[email protected],HR,50000
+104,Susan,Anderson,[email protected],IT,65000
+105,David,Martin,[email protected],Sales,58000
+106,Linda,Davis,[email protected],Finance,62000
+107,William,Miller,[email protected],Marketing,56000
+108,Sarah,Anderson,[email protected],HR,51000
+109,Robert,Clark,[email protected],IT,67000
+110,Karen,Wilson,[email protected],Sales,59000
+111,James,Brown,[email protected],Finance,61000
+112,Anna,Johnson,[email protected],Marketing,57000
+113,Christopher,Moore,[email protected],HR,52000
+114,Laura,White,[email protected],IT,68000
+115,Mark,Davis,[email protected],Sales,60000
+116,Patricia,Jones,[email protected],Finance,63000
+117,Matthew,Taylor,[email protected],Marketing,58000
+118,Jennifer,Young,[email protected],HR,53000
+119,Steven,Anderson,[email protected],IT,69000
+120,Elizabeth,Thomas,[email protected],Sales,61000
+121,Kevin,Harris,[email protected],Finance,64000
+122,Deborah,Smith,[email protected],Marketing,59000
+123,Joseph,Walker,[email protected],HR,54000
+124,Cynthia,Jackson,[email protected],IT,70000
+125,Daniel,Hall,[email protected],Sales,62000

data/z_house.csv ADDED Viewed

+PropertyID,StreetAddress,City,State,ZipCode,NumberOfBedrooms,NumberOfBathrooms,SquareFootage,Price
+1,123 Main St,Los Angeles,CA,90001,3,2,1800,550000
+2,456 Elm St,New York,NY,10001,2,1,1200,750000
+3,789 Oak St,San Francisco,CA,94101,4,3,2500,950000
+4,101 Maple St,Boston,MA,02101,3,2.5,2000,680000
+5,202 Pine St,Miami,FL,33101,4,3.5,2700,820000
+6,303 Cedar St,Chicago,IL,60601,2,1,1100,450000

utils/load_csv.py ADDED Viewed

+import pandas as pd
+from utils.read_config import get_args
+def check_csv(upload_file):
+    df = pd.read_csv(upload_file)
+    return df
+# Function to load sample of dataset
+def load_sample(num_sample_records, sample_method, df, col_name):
+    sample_first_records = get_args("first_records")
+    sample_random_seed = get_args("random_seed")
+    num_sample_records = num_sample_records if num_sample_records <= sample_first_records else sample_first_records
+    # Keep only required column
+    df = df[[col_name]]
+    if sample_method == "First":
+        df = df.iloc[:num_sample_records].copy().reset_index()
+    if sample_method == "Last":
+        df = df.iloc[-num_sample_records:].copy().reset_index()
+    if sample_method == "Random":
+        df = df.sample(num_sample_records, random_state=sample_random_seed).copy().reset_index()
+    return df