Update the UI and Modularise the methodologies
Browse files- app.py +62 -115
- methodologies.json +14 -0
- plot.ipynb +0 -0
- utils/load_csv.py +6 -6
- z_animal.csv +0 -11
- z_employee.csv +0 -26
- z_house.csv +0 -7
app.py
CHANGED
@@ -1,98 +1,44 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
import os
|
|
|
4 |
from scripts.genbit_metrics import *
|
5 |
from scripts.gender_profession_tagging import *
|
6 |
from scripts.gender_tagging import *
|
7 |
from utils.load_csv import *
|
8 |
from utils.read_config import get_args
|
9 |
|
10 |
-
|
11 |
-
["Category", "Value", "Percentage"],
|
12 |
-
["Total Reviews", 50000, None],
|
13 |
-
["Total Sentences", 621647, None],
|
14 |
-
["Pronouns in Sentences", None, None],
|
15 |
-
["Male Pronouns", 85615, None],
|
16 |
-
["Female Pronouns", 39372, None],
|
17 |
-
["Both Male and Female Pronouns", 7765, None],
|
18 |
-
["Exclusive Usage of Pronouns", None, None],
|
19 |
-
["Only Male Pronouns", 77860, 13.77],
|
20 |
-
["Only Female Pronouns", 31617, 6.33],
|
21 |
-
["Pronouns and Professions in Sentences", None, None],
|
22 |
-
["Male Pronouns with Professions", 5580, 0.9],
|
23 |
-
["Female Pronouns with Professions", 2618, 0.42],
|
24 |
-
["Exclusive Usage of Pronouns with Professions", None, None],
|
25 |
-
["Only Male Pronouns with Professions", 5011, 0.81],
|
26 |
-
["Only Female Pronouns with Professions", 2049, 0.33],
|
27 |
-
["Pronouns and Professions in Combination", None, None],
|
28 |
-
["Male or Female Pronouns with Professions", 7629, 1.23],
|
29 |
-
["Male and Female Pronouns with Professions", 569, 0.09]
|
30 |
-
]
|
31 |
-
|
32 |
-
|
33 |
-
def display_methodology(methodology):
|
34 |
-
title = "### " + methodology
|
35 |
-
description = ""
|
36 |
-
|
37 |
-
if methodology == "Term Identity Diversity Analysis":
|
38 |
-
description = "333"
|
39 |
-
elif methodology == "Gender Label Evaluation":
|
40 |
-
description = "This approach to addressing gender bias in language places a strong emphasis on a fundamental shift in detection and mitigation strategies.\n- Instead of solely relying on traditional frequency-based methods, this approach adopts a more nuanced perspective, prioritizing features within the text that consider contextual and semantic cues. It recognizes that gender bias extends beyond mere word frequency and delves into how language is structured and how it reinforces gender stereotypes.\n- Even with advanced models like Word Embedding and Contextual Word Embedding, which capture more complex language features, there's still a risk of inheriting biases from training data.\n- To tackle this, this approach advocates for a data-driven strategy, involving the collection and labeling of datasets encompassing various subtypes of bias, using a comprehensive taxonomy for precise categorization."
|
41 |
-
elif methodology == "Microsoft Genbit":
|
42 |
-
description = "GenBiT is a versatile tool designed to address gender bias in language datasets by utilizing word co-occurrence statistical methods to measure bias. It introduces a novel approach to mitigating gender bias by combining contextual data augmentation, random sampling, sentence classification, and targeted gendered data filtering.\n- The primary goal is to reduce historical gender biases within conversational parallel multilingual datasets, ultimately enhancing the fairness and inclusiveness of machine learning model training and its subsequent applications.\n- What sets GenBiT apart is its adaptability to various forms of bias, not limited to gender alone. It can effectively address biases related to race, religion, or other dimensions, making it a valuable generic tool for bias mitigation in language datasets.\n- GenBiT's impact extends beyond bias reduction metrics; it has shown positive results in improving the performance of machine learning classifiers like Support Vector Machine(SVM). Augmented datasets produced by GenBiT yield significant enhancements in f1-score when compared to the original datasets, underlining its practical benefits in machine learning applications."
|
43 |
|
44 |
-
return (
|
45 |
-
gr.Markdown.update(title, visible=True),
|
46 |
-
gr.Markdown.update(description, visible=True)
|
47 |
-
)
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
def run_evaluation(dataset_file, dataset_scope, dataset_scope_n, dataset_columns, methodology):
|
51 |
|
|
|
52 |
status = {}
|
53 |
-
|
54 |
-
dataset = check_csv(dataset_file.name)
|
55 |
sample_method = dataset_scope
|
56 |
-
col_name =
|
57 |
num_sample_records = dataset_scope_n
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
dataset, sample_method, col_name, num_sample_records)
|
63 |
-
if methodology == "Gender Label Evaluation":
|
64 |
-
status = load_dataset_and_analyze_gender_profession(
|
65 |
-
dataset, sample_method, col_name, num_sample_records)
|
66 |
-
if methodology == "Microsoft Genbit":
|
67 |
-
status = load_dataset_and_get_genbit_metrics(
|
68 |
-
dataset, sample_method, col_name, num_sample_records)
|
69 |
-
|
70 |
-
# status = {
|
71 |
-
# "dataset": dataset_file.name,
|
72 |
-
# "methodology": methodology,
|
73 |
-
# "scope": dataset_scope + " " + str(dataset_scope_n),
|
74 |
-
# "column": dataset_columns
|
75 |
-
# }
|
76 |
|
77 |
return gr.JSON.update(status, visible=True)
|
78 |
|
79 |
-
if methodology == "A":
|
80 |
-
run_a(dataset)
|
81 |
-
elif methodology == "B":
|
82 |
-
run_b(dataset)
|
83 |
-
elif methodology == "C":
|
84 |
-
run_c(dataset)
|
85 |
-
|
86 |
|
87 |
def process_dataset(dataset):
|
88 |
-
|
89 |
data = pd.read_csv(dataset.name)
|
90 |
-
# maximum_records = get_args("first_records")
|
91 |
-
# input_records = data.shape(0)
|
92 |
-
# num_sample_records = input_records if input_records < maximum_records else maximum_records
|
93 |
|
94 |
-
columns = data.columns.tolist()
|
95 |
-
columns = [x for x in columns if data[x].dtype == "object"]
|
96 |
|
97 |
return (
|
98 |
gr.Radio.update(
|
@@ -104,13 +50,13 @@ def process_dataset(dataset):
|
|
104 |
interactive=True,
|
105 |
),
|
106 |
gr.Slider.update(
|
107 |
-
label="Number of Entries",
|
108 |
-
info=f"Determines the number of entries to be analyzed.
|
109 |
minimum=1,
|
110 |
-
maximum=data.shape[0],
|
111 |
-
value=data.shape[0] // 2,
|
112 |
visible=True,
|
113 |
-
interactive=True
|
114 |
),
|
115 |
gr.Radio.update(
|
116 |
label="Column",
|
@@ -119,101 +65,102 @@ def process_dataset(dataset):
|
|
119 |
value=columns[0],
|
120 |
visible=True,
|
121 |
interactive=True,
|
122 |
-
)
|
123 |
)
|
124 |
|
125 |
|
126 |
-
def
|
127 |
data = pd.read_csv(dataset.name)
|
128 |
-
corpus = data[column].
|
129 |
|
130 |
-
return gr.Dataframe.update(
|
|
|
|
|
131 |
|
132 |
|
133 |
BiasAware = gr.Blocks(title="BiasAware: Dataset Bias Detection")
|
134 |
|
135 |
with BiasAware:
|
136 |
-
gr.Markdown("# BiasAware: Dataset Bias Detection")
|
137 |
gr.Markdown(
|
138 |
-
"
|
139 |
)
|
140 |
|
141 |
with gr.Row():
|
142 |
-
with gr.Column(scale=
|
143 |
gr.Markdown("## Dataset")
|
144 |
|
145 |
-
dataset_file = gr.File()
|
146 |
dataset_examples = gr.Examples(
|
147 |
[
|
148 |
-
os.path.join(os.path.dirname(__file__),
|
149 |
-
|
150 |
-
os.path.join(os.path.dirname(__file__),
|
151 |
-
"data/z_employee.csv"),
|
152 |
-
os.path.join(os.path.dirname(
|
153 |
-
__file__), "data/z_house.csv"),
|
154 |
-
|
155 |
],
|
156 |
inputs=dataset_file,
|
|
|
157 |
)
|
158 |
|
159 |
dataset_scope = gr.Radio(visible=False)
|
160 |
dataset_scope_n = gr.Slider(visible=False)
|
161 |
-
|
162 |
|
163 |
-
dataset_corpus = gr.Dataframe(
|
|
|
|
|
164 |
|
165 |
-
with gr.Column(scale=
|
166 |
gr.Markdown("## Methodology")
|
167 |
|
168 |
methodology = gr.Radio(
|
169 |
label="Methodology",
|
170 |
info="Determines the methodology to be used for bias detection",
|
171 |
choices=[
|
172 |
-
"Term Identity Diversity
|
173 |
-
"Gender
|
174 |
-
"Microsoft
|
175 |
],
|
176 |
-
value="Term Identity Diversity Analysis",
|
177 |
)
|
178 |
|
179 |
evalButton = gr.Button("Run Evaluation")
|
180 |
|
181 |
-
|
182 |
-
methodology_description = gr.Markdown(visible=False)
|
183 |
|
184 |
with gr.Column(scale=4):
|
185 |
gr.Markdown("## Result")
|
186 |
|
187 |
result_status = gr.JSON(visible=False)
|
188 |
-
result = gr.DataFrame(
|
|
|
|
|
189 |
|
190 |
dataset_file.change(
|
191 |
fn=process_dataset,
|
192 |
inputs=[dataset_file],
|
193 |
-
outputs=[
|
194 |
-
dataset_scope,
|
195 |
-
dataset_scope_n,
|
196 |
-
dataset_columns
|
197 |
-
]
|
198 |
)
|
199 |
|
200 |
-
|
201 |
-
fn=
|
202 |
-
inputs=[dataset_file,
|
203 |
outputs=[dataset_corpus],
|
204 |
)
|
205 |
|
206 |
methodology.change(
|
207 |
-
fn=
|
208 |
inputs=[methodology],
|
209 |
-
outputs=[
|
210 |
)
|
211 |
|
212 |
evalButton.click(
|
213 |
-
fn=
|
214 |
-
inputs=[
|
215 |
-
|
216 |
-
|
|
|
|
|
|
|
|
|
|
|
217 |
)
|
218 |
|
219 |
BiasAware.launch()
|
|
|
1 |
+
import json
|
2 |
import gradio as gr
|
3 |
import pandas as pd
|
4 |
import os
|
5 |
+
|
6 |
from scripts.genbit_metrics import *
|
7 |
from scripts.gender_profession_tagging import *
|
8 |
from scripts.gender_tagging import *
|
9 |
from utils.load_csv import *
|
10 |
from utils.read_config import get_args
|
11 |
|
12 |
+
methodologies = json.load(open("methodologies.json", "r"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
|
|
|
|
|
|
|
|
14 |
|
15 |
+
def get_methodology_metadata(methodology):
|
16 |
+
title = "## " + methodology
|
17 |
+
description = methodologies.get(methodology).get("description")
|
18 |
+
|
19 |
+
metadata = f"{title}\n\n{description}"
|
20 |
+
|
21 |
+
return gr.Markdown.update(metadata, visible=True)
|
22 |
|
|
|
23 |
|
24 |
+
def evaluate(dataset_file, dataset_scope, dataset_scope_n, dataset_column, methodology):
|
25 |
status = {}
|
26 |
+
dataset = pd.read_csv(dataset_file.name)
|
|
|
27 |
sample_method = dataset_scope
|
28 |
+
col_name = dataset_column
|
29 |
num_sample_records = dataset_scope_n
|
30 |
|
31 |
+
status = globals()[methodologies.get(methodology).get("fx")](
|
32 |
+
dataset, sample_method, col_name, num_sample_records
|
33 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
return gr.JSON.update(status, visible=True)
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
def process_dataset(dataset):
|
|
|
39 |
data = pd.read_csv(dataset.name)
|
|
|
|
|
|
|
40 |
|
41 |
+
columns = data.select_dtypes(include=["object"]).columns.tolist()
|
|
|
42 |
|
43 |
return (
|
44 |
gr.Radio.update(
|
|
|
50 |
interactive=True,
|
51 |
),
|
52 |
gr.Slider.update(
|
53 |
+
label=f"Number of Entries",
|
54 |
+
info=f"Determines the number of entries to be analyzed. Due to computational constraints, the maximum number of entries that can be analyzed is {get_args('first_records')}.",
|
55 |
minimum=1,
|
56 |
+
maximum=min(data.shape[0], get_args("first_records")),
|
57 |
+
value=min(data.shape[0], get_args("first_records")) // 2,
|
58 |
visible=True,
|
59 |
+
interactive=True,
|
60 |
),
|
61 |
gr.Radio.update(
|
62 |
label="Column",
|
|
|
65 |
value=columns[0],
|
66 |
visible=True,
|
67 |
interactive=True,
|
68 |
+
),
|
69 |
)
|
70 |
|
71 |
|
72 |
+
def get_column_metadata(dataset, column):
|
73 |
data = pd.read_csv(dataset.name)
|
74 |
+
corpus = data[column].head(10).tolist()
|
75 |
|
76 |
+
return gr.Dataframe.update(
|
77 |
+
value=pd.DataFrame({f"Data Corpus: {column}": corpus}), visible=True
|
78 |
+
)
|
79 |
|
80 |
|
81 |
BiasAware = gr.Blocks(title="BiasAware: Dataset Bias Detection")
|
82 |
|
83 |
with BiasAware:
|
|
|
84 |
gr.Markdown(
|
85 |
+
"# BiasAware: Dataset Bias Detection\n\nBiasAware is a specialized tool for detecting and quantifying biases within datasets used for Natural Language Processing (NLP) tasks. NLP training datasets frequently mirror the inherent biases of their source materials, resulting in AI models that unintentionally perpetuate stereotypes, exhibit underrepresentation, and showcase skewed perspectives."
|
86 |
)
|
87 |
|
88 |
with gr.Row():
|
89 |
+
with gr.Column(scale=2):
|
90 |
gr.Markdown("## Dataset")
|
91 |
|
92 |
+
dataset_file = gr.File(label="Dataset")
|
93 |
dataset_examples = gr.Examples(
|
94 |
[
|
95 |
+
os.path.join(os.path.dirname(__file__), "data/z_animal.csv"),
|
96 |
+
os.path.join(os.path.dirname(__file__), "data/z_employee.csv"),
|
97 |
+
os.path.join(os.path.dirname(__file__), "data/z_house.csv"),
|
|
|
|
|
|
|
|
|
98 |
],
|
99 |
inputs=dataset_file,
|
100 |
+
label="Example Datasets",
|
101 |
)
|
102 |
|
103 |
dataset_scope = gr.Radio(visible=False)
|
104 |
dataset_scope_n = gr.Slider(visible=False)
|
105 |
+
dataset_column = gr.Radio(visible=False)
|
106 |
|
107 |
+
dataset_corpus = gr.Dataframe(
|
108 |
+
row_count=(5, "fixed"), col_count=(1, "fixed"), visible=False
|
109 |
+
)
|
110 |
|
111 |
+
with gr.Column(scale=2):
|
112 |
gr.Markdown("## Methodology")
|
113 |
|
114 |
methodology = gr.Radio(
|
115 |
label="Methodology",
|
116 |
info="Determines the methodology to be used for bias detection",
|
117 |
choices=[
|
118 |
+
"Gender Divide (Term Identity Diversity)",
|
119 |
+
"Gender Profession Bias (Lexical Evaluation)",
|
120 |
+
"GenBiT (Microsoft Responsible AI Gender Bias Tool)",
|
121 |
],
|
|
|
122 |
)
|
123 |
|
124 |
evalButton = gr.Button("Run Evaluation")
|
125 |
|
126 |
+
methodology_metadata = gr.Markdown(visible=False)
|
|
|
127 |
|
128 |
with gr.Column(scale=4):
|
129 |
gr.Markdown("## Result")
|
130 |
|
131 |
result_status = gr.JSON(visible=False)
|
132 |
+
result = gr.DataFrame(
|
133 |
+
row_count=(5, "fixed"), col_count=(3, "fixed"), visible=False
|
134 |
+
)
|
135 |
|
136 |
dataset_file.change(
|
137 |
fn=process_dataset,
|
138 |
inputs=[dataset_file],
|
139 |
+
outputs=[dataset_scope, dataset_scope_n, dataset_column],
|
|
|
|
|
|
|
|
|
140 |
)
|
141 |
|
142 |
+
dataset_column.change(
|
143 |
+
fn=get_column_metadata,
|
144 |
+
inputs=[dataset_file, dataset_column],
|
145 |
outputs=[dataset_corpus],
|
146 |
)
|
147 |
|
148 |
methodology.change(
|
149 |
+
fn=get_methodology_metadata,
|
150 |
inputs=[methodology],
|
151 |
+
outputs=[methodology_metadata],
|
152 |
)
|
153 |
|
154 |
evalButton.click(
|
155 |
+
fn=evaluate,
|
156 |
+
inputs=[
|
157 |
+
dataset_file,
|
158 |
+
dataset_scope,
|
159 |
+
dataset_scope_n,
|
160 |
+
dataset_column,
|
161 |
+
methodology,
|
162 |
+
],
|
163 |
+
outputs=[result_status],
|
164 |
)
|
165 |
|
166 |
BiasAware.launch()
|
methodologies.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"Gender Divide (Term Identity Diversity)": {
|
3 |
+
"description": "333",
|
4 |
+
"fx": "load_dataset_and_analyze_gender_tag"
|
5 |
+
},
|
6 |
+
"Gender Profession Bias (Lexical Evaluation)": {
|
7 |
+
"description": "This approach to addressing gender bias in language places a strong emphasis on a fundamental shift in detection and mitigation strategies.\n- Instead of solely relying on traditional frequency-based methods, this approach adopts a more nuanced perspective, prioritizing features within the text that consider contextual and semantic cues. It recognizes that gender bias extends beyond mere word frequency and delves into how language is structured and how it reinforces gender stereotypes.\n- Even with advanced models like Word Embedding and Contextual Word Embedding, which capture more complex language features, there's still a risk of inheriting biases from training data.\n- To tackle this, this approach advocates for a data-driven strategy, involving the collection and labeling of datasets encompassing various subtypes of bias, using a comprehensive taxonomy for precise categorization.",
|
8 |
+
"fx": "load_dataset_and_analyze_gender_profession"
|
9 |
+
},
|
10 |
+
"GenBiT (Microsoft Responsible AI Gender Bias Tool)": {
|
11 |
+
"description": "[GenBiT](https://www.microsoft.com/en-us/research/uploads/prod/2021/10/MSJAR_Genbit_Final_Version-616fd3a073758.pdf) is a versatile tool designed to address gender bias in language datasets by utilizing word co-occurrence statistical methods to measure bias. It introduces a novel approach to mitigating gender bias by combining contextual data augmentation, random sampling, sentence classification, and targeted gendered data filtering.\n- The primary goal is to reduce historical gender biases within conversational parallel multilingual datasets, ultimately enhancing the fairness and inclusiveness of machine learning model training and its subsequent applications.\n- What sets GenBiT apart is its adaptability to various forms of bias, not limited to gender alone. It can effectively address biases related to race, religion, or other dimensions, making it a valuable generic tool for bias mitigation in language datasets.\n- GenBiT's impact extends beyond bias reduction metrics; it has shown positive results in improving the performance of machine learning classifiers like Support Vector Machine(SVM). Augmented datasets produced by GenBiT yield significant enhancements in f1-score when compared to the original datasets, underlining its practical benefits in machine learning applications.",
|
12 |
+
"fx": "load_dataset_and_get_genbit_metrics"
|
13 |
+
}
|
14 |
+
}
|
plot.ipynb
DELETED
The diff for this file is too large to render.
See raw diff
|
|
utils/load_csv.py
CHANGED
@@ -1,17 +1,16 @@
|
|
1 |
import pandas as pd
|
2 |
from utils.read_config import get_args
|
3 |
-
def check_csv(upload_file):
|
4 |
-
df = pd.read_csv(upload_file)
|
5 |
-
return df
|
6 |
|
7 |
# Function to load sample of dataset
|
|
|
|
|
8 |
def load_sample(num_sample_records, sample_method, df, col_name):
|
9 |
|
10 |
sample_first_records = get_args("first_records")
|
11 |
sample_random_seed = get_args("random_seed")
|
12 |
|
13 |
num_sample_records = num_sample_records if num_sample_records <= sample_first_records else sample_first_records
|
14 |
-
|
15 |
# Keep only required column
|
16 |
df = df[[col_name]]
|
17 |
if sample_method == "First":
|
@@ -19,5 +18,6 @@ def load_sample(num_sample_records, sample_method, df, col_name):
|
|
19 |
if sample_method == "Last":
|
20 |
df = df.iloc[-num_sample_records:].copy().reset_index()
|
21 |
if sample_method == "Random":
|
22 |
-
df = df.sample(num_sample_records,
|
23 |
-
|
|
|
|
1 |
import pandas as pd
|
2 |
from utils.read_config import get_args
|
|
|
|
|
|
|
3 |
|
4 |
# Function to load sample of dataset
|
5 |
+
|
6 |
+
|
7 |
def load_sample(num_sample_records, sample_method, df, col_name):
|
8 |
|
9 |
sample_first_records = get_args("first_records")
|
10 |
sample_random_seed = get_args("random_seed")
|
11 |
|
12 |
num_sample_records = num_sample_records if num_sample_records <= sample_first_records else sample_first_records
|
13 |
+
|
14 |
# Keep only required column
|
15 |
df = df[[col_name]]
|
16 |
if sample_method == "First":
|
|
|
18 |
if sample_method == "Last":
|
19 |
df = df.iloc[-num_sample_records:].copy().reset_index()
|
20 |
if sample_method == "Random":
|
21 |
+
df = df.sample(num_sample_records,
|
22 |
+
random_state=sample_random_seed).copy().reset_index()
|
23 |
+
return df
|
z_animal.csv
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
AnimalID,CommonName,ScientificName,Class,Order,Family,Habitat,ConservationStatus
|
2 |
-
1,Lion,Panthera leo,Mammalia,Carnivora,Felidae,Savanna,Vulnerable
|
3 |
-
2,Eagle,Aquila chrysaetos,Aves,Accipitriformes,Accipitridae,Mountains,Least Concern
|
4 |
-
3,Dolphin,Tursiops truncatus,Mammalia,Cetacea,Delphinidae,Ocean,Least Concern
|
5 |
-
4,Elephant,Loxodonta africana,Mammalia,Proboscidea,Elephantidae,Grassland,Vulnerable
|
6 |
-
5,Tiger,Panthera tigris,Mammalia,Carnivora,Felidae,Forest,Endangered
|
7 |
-
6,Penguin,Spheniscidae,Aves,Sphenisciformes,Spheniscidae,Antarctica,Least Concern
|
8 |
-
7,Giraffe,Giraffa camelopardalis,Mammalia,Artiodactyla,Giraffidae,Savanna,Vulnerable
|
9 |
-
8,Cheetah,Acinonyx jubatus,Mammalia,Carnivora,Felidae,Grassland,Vulnerable
|
10 |
-
9,Panda,Ailuropoda melanoleuca,Mammalia,Carnivora,Ursidae,Forest,Endangered
|
11 |
-
10,Kangaroo,Macropus rufus,Mammalia,Diprotodontia,Macropodidae,Grassland,Least Concern
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
z_employee.csv
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
EmployeeID,FirstName,LastName,Email,Department,Salary
|
2 |
-
101,John,Smith,[email protected],Finance,60000
|
3 |
-
102,Emily,Johnson,[email protected],Marketing,55000
|
4 |
-
103,Michael,Williams,[email protected],HR,50000
|
5 |
-
104,Susan,Anderson,[email protected],IT,65000
|
6 |
-
105,David,Martin,[email protected],Sales,58000
|
7 |
-
106,Linda,Davis,[email protected],Finance,62000
|
8 |
-
107,William,Miller,[email protected],Marketing,56000
|
9 |
-
108,Sarah,Anderson,[email protected],HR,51000
|
10 |
-
109,Robert,Clark,[email protected],IT,67000
|
11 |
-
110,Karen,Wilson,[email protected],Sales,59000
|
12 |
-
111,James,Brown,[email protected],Finance,61000
|
13 |
-
112,Anna,Johnson,[email protected],Marketing,57000
|
14 |
-
113,Christopher,Moore,[email protected],HR,52000
|
15 |
-
114,Laura,White,[email protected],IT,68000
|
16 |
-
115,Mark,Davis,[email protected],Sales,60000
|
17 |
-
116,Patricia,Jones,[email protected],Finance,63000
|
18 |
-
117,Matthew,Taylor,[email protected],Marketing,58000
|
19 |
-
118,Jennifer,Young,[email protected],HR,53000
|
20 |
-
119,Steven,Anderson,[email protected],IT,69000
|
21 |
-
120,Elizabeth,Thomas,[email protected],Sales,61000
|
22 |
-
121,Kevin,Harris,[email protected],Finance,64000
|
23 |
-
122,Deborah,Smith,[email protected],Marketing,59000
|
24 |
-
123,Joseph,Walker,[email protected],HR,54000
|
25 |
-
124,Cynthia,Jackson,[email protected],IT,70000
|
26 |
-
125,Daniel,Hall,[email protected],Sales,62000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
z_house.csv
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
PropertyID,StreetAddress,City,State,ZipCode,NumberOfBedrooms,NumberOfBathrooms,SquareFootage,Price
|
2 |
-
1,123 Main St,Los Angeles,CA,90001,3,2,1800,550000
|
3 |
-
2,456 Elm St,New York,NY,10001,2,1,1200,750000
|
4 |
-
3,789 Oak St,San Francisco,CA,94101,4,3,2500,950000
|
5 |
-
4,101 Maple St,Boston,MA,02101,3,2.5,2000,680000
|
6 |
-
5,202 Pine St,Miami,FL,33101,4,3.5,2700,820000
|
7 |
-
6,303 Cedar St,Chicago,IL,60601,2,1,1100,450000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|