sudipta002 commited on
Commit
b3ce2b2
·
1 Parent(s): a365da6

Add sample files

Browse files
Files changed (4) hide show
  1. data/z_animal.csv +11 -0
  2. data/z_employee.csv +26 -0
  3. data/z_house.csv +7 -0
  4. utils/load_csv.py +23 -0
data/z_animal.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AnimalID,CommonName,ScientificName,Class,Order,Family,Habitat,ConservationStatus
2
+ 1,Lion,Panthera leo,Mammalia,Carnivora,Felidae,Savanna,Vulnerable
3
+ 2,Eagle,Aquila chrysaetos,Aves,Accipitriformes,Accipitridae,Mountains,Least Concern
4
+ 3,Dolphin,Tursiops truncatus,Mammalia,Cetacea,Delphinidae,Ocean,Least Concern
5
+ 4,Elephant,Loxodonta africana,Mammalia,Proboscidea,Elephantidae,Grassland,Vulnerable
6
+ 5,Tiger,Panthera tigris,Mammalia,Carnivora,Felidae,Forest,Endangered
7
+ 6,Penguin,Spheniscidae,Aves,Sphenisciformes,Spheniscidae,Antarctica,Least Concern
8
+ 7,Giraffe,Giraffa camelopardalis,Mammalia,Artiodactyla,Giraffidae,Savanna,Vulnerable
9
+ 8,Cheetah,Acinonyx jubatus,Mammalia,Carnivora,Felidae,Grassland,Vulnerable
10
+ 9,Panda,Ailuropoda melanoleuca,Mammalia,Carnivora,Ursidae,Forest,Endangered
11
+ 10,Kangaroo,Macropus rufus,Mammalia,Diprotodontia,Macropodidae,Grassland,Least Concern
data/z_employee.csv ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ EmployeeID,FirstName,LastName,Email,Department,Salary
2
+ 101,John,Smith,[email protected],Finance,60000
3
+ 102,Emily,Johnson,[email protected],Marketing,55000
4
+ 103,Michael,Williams,[email protected],HR,50000
5
+ 104,Susan,Anderson,[email protected],IT,65000
6
+ 105,David,Martin,[email protected],Sales,58000
7
+ 106,Linda,Davis,[email protected],Finance,62000
8
+ 107,William,Miller,[email protected],Marketing,56000
9
+ 108,Sarah,Anderson,[email protected],HR,51000
10
+ 109,Robert,Clark,[email protected],IT,67000
11
+ 110,Karen,Wilson,[email protected],Sales,59000
12
+ 111,James,Brown,[email protected],Finance,61000
13
+ 112,Anna,Johnson,[email protected],Marketing,57000
14
+ 113,Christopher,Moore,[email protected],HR,52000
15
+ 114,Laura,White,[email protected],IT,68000
16
+ 115,Mark,Davis,[email protected],Sales,60000
17
+ 116,Patricia,Jones,[email protected],Finance,63000
18
+ 117,Matthew,Taylor,[email protected],Marketing,58000
19
+ 118,Jennifer,Young,[email protected],HR,53000
20
+ 119,Steven,Anderson,[email protected],IT,69000
21
+ 120,Elizabeth,Thomas,[email protected],Sales,61000
22
+ 121,Kevin,Harris,[email protected],Finance,64000
23
+ 122,Deborah,Smith,[email protected],Marketing,59000
24
+ 123,Joseph,Walker,[email protected],HR,54000
25
+ 124,Cynthia,Jackson,[email protected],IT,70000
26
+ 125,Daniel,Hall,[email protected],Sales,62000
data/z_house.csv ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ PropertyID,StreetAddress,City,State,ZipCode,NumberOfBedrooms,NumberOfBathrooms,SquareFootage,Price
2
+ 1,123 Main St,Los Angeles,CA,90001,3,2,1800,550000
3
+ 2,456 Elm St,New York,NY,10001,2,1,1200,750000
4
+ 3,789 Oak St,San Francisco,CA,94101,4,3,2500,950000
5
+ 4,101 Maple St,Boston,MA,02101,3,2.5,2000,680000
6
+ 5,202 Pine St,Miami,FL,33101,4,3.5,2700,820000
7
+ 6,303 Cedar St,Chicago,IL,60601,2,1,1100,450000
utils/load_csv.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from utils.read_config import get_args
3
+ def check_csv(upload_file):
4
+ df = pd.read_csv(upload_file)
5
+ return df
6
+
7
+ # Function to load sample of dataset
8
+ def load_sample(num_sample_records, sample_method, df, col_name):
9
+
10
+ sample_first_records = get_args("first_records")
11
+ sample_random_seed = get_args("random_seed")
12
+
13
+ num_sample_records = num_sample_records if num_sample_records <= sample_first_records else sample_first_records
14
+
15
+ # Keep only required column
16
+ df = df[[col_name]]
17
+ if sample_method == "First":
18
+ df = df.iloc[:num_sample_records].copy().reset_index()
19
+ if sample_method == "Last":
20
+ df = df.iloc[-num_sample_records:].copy().reset_index()
21
+ if sample_method == "Random":
22
+ df = df.sample(num_sample_records, random_state=sample_random_seed).copy().reset_index()
23
+ return df