sudipta002
commited on
Commit
·
b3ce2b2
1
Parent(s):
a365da6
Add sample files
Browse files- data/z_animal.csv +11 -0
- data/z_employee.csv +26 -0
- data/z_house.csv +7 -0
- utils/load_csv.py +23 -0
data/z_animal.csv
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
AnimalID,CommonName,ScientificName,Class,Order,Family,Habitat,ConservationStatus
|
2 |
+
1,Lion,Panthera leo,Mammalia,Carnivora,Felidae,Savanna,Vulnerable
|
3 |
+
2,Eagle,Aquila chrysaetos,Aves,Accipitriformes,Accipitridae,Mountains,Least Concern
|
4 |
+
3,Dolphin,Tursiops truncatus,Mammalia,Cetacea,Delphinidae,Ocean,Least Concern
|
5 |
+
4,Elephant,Loxodonta africana,Mammalia,Proboscidea,Elephantidae,Grassland,Vulnerable
|
6 |
+
5,Tiger,Panthera tigris,Mammalia,Carnivora,Felidae,Forest,Endangered
|
7 |
+
6,Penguin,Spheniscidae,Aves,Sphenisciformes,Spheniscidae,Antarctica,Least Concern
|
8 |
+
7,Giraffe,Giraffa camelopardalis,Mammalia,Artiodactyla,Giraffidae,Savanna,Vulnerable
|
9 |
+
8,Cheetah,Acinonyx jubatus,Mammalia,Carnivora,Felidae,Grassland,Vulnerable
|
10 |
+
9,Panda,Ailuropoda melanoleuca,Mammalia,Carnivora,Ursidae,Forest,Endangered
|
11 |
+
10,Kangaroo,Macropus rufus,Mammalia,Diprotodontia,Macropodidae,Grassland,Least Concern
|
data/z_employee.csv
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
EmployeeID,FirstName,LastName,Email,Department,Salary
|
2 |
+
101,John,Smith,[email protected],Finance,60000
|
3 |
+
102,Emily,Johnson,[email protected],Marketing,55000
|
4 |
+
103,Michael,Williams,[email protected],HR,50000
|
5 |
+
104,Susan,Anderson,[email protected],IT,65000
|
6 |
+
105,David,Martin,[email protected],Sales,58000
|
7 |
+
106,Linda,Davis,[email protected],Finance,62000
|
8 |
+
107,William,Miller,[email protected],Marketing,56000
|
9 |
+
108,Sarah,Anderson,[email protected],HR,51000
|
10 |
+
109,Robert,Clark,[email protected],IT,67000
|
11 |
+
110,Karen,Wilson,[email protected],Sales,59000
|
12 |
+
111,James,Brown,[email protected],Finance,61000
|
13 |
+
112,Anna,Johnson,[email protected],Marketing,57000
|
14 |
+
113,Christopher,Moore,[email protected],HR,52000
|
15 |
+
114,Laura,White,[email protected],IT,68000
|
16 |
+
115,Mark,Davis,[email protected],Sales,60000
|
17 |
+
116,Patricia,Jones,[email protected],Finance,63000
|
18 |
+
117,Matthew,Taylor,[email protected],Marketing,58000
|
19 |
+
118,Jennifer,Young,[email protected],HR,53000
|
20 |
+
119,Steven,Anderson,[email protected],IT,69000
|
21 |
+
120,Elizabeth,Thomas,[email protected],Sales,61000
|
22 |
+
121,Kevin,Harris,[email protected],Finance,64000
|
23 |
+
122,Deborah,Smith,[email protected],Marketing,59000
|
24 |
+
123,Joseph,Walker,[email protected],HR,54000
|
25 |
+
124,Cynthia,Jackson,[email protected],IT,70000
|
26 |
+
125,Daniel,Hall,[email protected],Sales,62000
|
data/z_house.csv
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PropertyID,StreetAddress,City,State,ZipCode,NumberOfBedrooms,NumberOfBathrooms,SquareFootage,Price
|
2 |
+
1,123 Main St,Los Angeles,CA,90001,3,2,1800,550000
|
3 |
+
2,456 Elm St,New York,NY,10001,2,1,1200,750000
|
4 |
+
3,789 Oak St,San Francisco,CA,94101,4,3,2500,950000
|
5 |
+
4,101 Maple St,Boston,MA,02101,3,2.5,2000,680000
|
6 |
+
5,202 Pine St,Miami,FL,33101,4,3.5,2700,820000
|
7 |
+
6,303 Cedar St,Chicago,IL,60601,2,1,1100,450000
|
utils/load_csv.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from utils.read_config import get_args
|
3 |
+
def check_csv(upload_file):
|
4 |
+
df = pd.read_csv(upload_file)
|
5 |
+
return df
|
6 |
+
|
7 |
+
# Function to load sample of dataset
|
8 |
+
def load_sample(num_sample_records, sample_method, df, col_name):
|
9 |
+
|
10 |
+
sample_first_records = get_args("first_records")
|
11 |
+
sample_random_seed = get_args("random_seed")
|
12 |
+
|
13 |
+
num_sample_records = num_sample_records if num_sample_records <= sample_first_records else sample_first_records
|
14 |
+
|
15 |
+
# Keep only required column
|
16 |
+
df = df[[col_name]]
|
17 |
+
if sample_method == "First":
|
18 |
+
df = df.iloc[:num_sample_records].copy().reset_index()
|
19 |
+
if sample_method == "Last":
|
20 |
+
df = df.iloc[-num_sample_records:].copy().reset_index()
|
21 |
+
if sample_method == "Random":
|
22 |
+
df = df.sample(num_sample_records, random_state=sample_random_seed).copy().reset_index()
|
23 |
+
return df
|