Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,12 +7,13 @@ import seaborn as sns
|
|
7 |
def load_and_clean_data(file_path):
|
8 |
data = pd.read_csv(file_path)
|
9 |
|
10 |
-
#
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
data.fillna("
|
|
|
16 |
|
17 |
return data
|
18 |
|
@@ -49,76 +50,74 @@ def filter_data(fiscal_year, employer, job_title, country_of_birth, country_of_n
|
|
49 |
def generate_visuals(filtered_data):
|
50 |
# Gender Distribution Bar Chart
|
51 |
plt.figure(figsize=(12, 8))
|
52 |
-
sns.countplot(data=filtered_data, x='gender', order=filtered_data['gender'].value_counts().index
|
53 |
-
plt.title("Gender Distribution"
|
54 |
-
plt.xlabel("Gender"
|
55 |
-
plt.ylabel("Count"
|
56 |
-
plt.xticks(
|
57 |
-
plt.yticks(fontsize=12)
|
58 |
plt.tight_layout()
|
59 |
gender_chart = gr.Plot(plt.gcf())
|
60 |
-
plt.close()
|
61 |
|
62 |
-
#
|
63 |
plt.figure(figsize=(12, 8))
|
64 |
-
|
65 |
-
|
66 |
-
plt.
|
67 |
-
plt.
|
68 |
-
plt.ylabel("Employer Name", fontsize=14)
|
69 |
plt.tight_layout()
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
gender_chart,
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
gr.
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
gr.
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
gr.Plot(label="Gender Distribution"),
|
108 |
-
gr.Plot(label="Top Employers")
|
109 |
-
]
|
110 |
-
|
111 |
-
with gr.Blocks() as app:
|
112 |
with gr.Row():
|
113 |
-
gr.
|
|
|
|
|
114 |
with gr.Row():
|
115 |
-
|
116 |
-
|
117 |
-
filter_.style(container=True)
|
118 |
-
apply_button.style()
|
119 |
with gr.Row():
|
120 |
-
|
121 |
-
|
122 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
-
|
|
|
7 |
def load_and_clean_data(file_path):
|
8 |
data = pd.read_csv(file_path)
|
9 |
|
10 |
+
# Handle missing values explicitly
|
11 |
+
numeric_cols = data.select_dtypes(include=['float64', 'int64']).columns
|
12 |
+
datetime_cols = data.select_dtypes(include=['datetime64[ns]']).columns
|
13 |
+
|
14 |
+
data[numeric_cols] = data[numeric_cols].fillna(-1) # Placeholder for numeric columns
|
15 |
+
data[datetime_cols] = data[datetime_cols].fillna(pd.Timestamp("1970-01-01")) # Placeholder for datetime
|
16 |
+
data.fillna("Unknown", inplace=True) # Remaining columns
|
17 |
|
18 |
return data
|
19 |
|
|
|
50 |
def generate_visuals(filtered_data):
|
51 |
# Gender Distribution Bar Chart
|
52 |
plt.figure(figsize=(12, 8))
|
53 |
+
sns.countplot(data=filtered_data, x='gender', order=filtered_data['gender'].value_counts().index)
|
54 |
+
plt.title("Gender Distribution")
|
55 |
+
plt.xlabel("Gender")
|
56 |
+
plt.ylabel("Count")
|
57 |
+
plt.xticks(rotation=45)
|
|
|
58 |
plt.tight_layout()
|
59 |
gender_chart = gr.Plot(plt.gcf())
|
|
|
60 |
|
61 |
+
# Country of Birth Distribution
|
62 |
plt.figure(figsize=(12, 8))
|
63 |
+
filtered_data['country_of_birth'].value_counts().head(10).plot(kind='bar', color='skyblue')
|
64 |
+
plt.title("Top 10 Countries of Birth")
|
65 |
+
plt.xlabel("Country")
|
66 |
+
plt.ylabel("Count")
|
|
|
67 |
plt.tight_layout()
|
68 |
+
country_chart = gr.Plot(plt.gcf())
|
69 |
+
|
70 |
+
return gender_chart, country_chart
|
71 |
+
|
72 |
+
# Gradio Interface
|
73 |
+
def app(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state):
|
74 |
+
filtered = filter_data(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state)
|
75 |
+
gender_chart, country_chart = generate_visuals(filtered)
|
76 |
+
return filtered, gender_chart, country_chart
|
77 |
+
|
78 |
+
# Dropdown options
|
79 |
+
fiscal_years = ["All"] + sorted(data['fiscal_year'].dropna().unique().astype(str).tolist())
|
80 |
+
employers = ["All"] + sorted(data['employer_name'].dropna().unique().tolist())
|
81 |
+
job_titles = ["All"] + sorted(data['job_title'].dropna().unique().tolist())
|
82 |
+
countries_of_birth = ["All"] + sorted(data['country_of_birth'].dropna().unique().tolist())
|
83 |
+
countries_of_nationality = ["All"] + sorted(data['country_of_nationality'].dropna().unique().tolist())
|
84 |
+
worksite_cities = ["All"] + sorted(data['worksite_city'].dropna().unique().tolist())
|
85 |
+
worksite_states = ["All"] + sorted(data['worksite_state'].dropna().unique().tolist())
|
86 |
+
|
87 |
+
# Gradio components
|
88 |
+
with gr.Blocks() as demo:
|
89 |
+
with gr.Row():
|
90 |
+
gr.Markdown("### Data Exploration Dashboard")
|
91 |
+
|
92 |
+
with gr.Row():
|
93 |
+
fiscal_year = gr.Dropdown(label="Fiscal Year", choices=fiscal_years)
|
94 |
+
employer = gr.Dropdown(label="Employer", choices=employers)
|
95 |
+
job_title = gr.Dropdown(label="Job Title", choices=job_titles)
|
96 |
+
|
97 |
+
with gr.Row():
|
98 |
+
country_of_birth = gr.Dropdown(label="Country of Birth", choices=countries_of_birth)
|
99 |
+
country_of_nationality = gr.Dropdown(label="Country of Nationality", choices=countries_of_nationality)
|
100 |
+
|
101 |
+
with gr.Row():
|
102 |
+
min_salary = gr.Textbox(label="Min Salary (USD)")
|
103 |
+
max_salary = gr.Textbox(label="Max Salary (USD)")
|
104 |
+
|
|
|
|
|
|
|
|
|
|
|
105 |
with gr.Row():
|
106 |
+
worksite_city = gr.Dropdown(label="Worksite City", choices=worksite_cities)
|
107 |
+
worksite_state = gr.Dropdown(label="Worksite State", choices=worksite_states)
|
108 |
+
|
109 |
with gr.Row():
|
110 |
+
apply_filters = gr.Button("Apply Filters")
|
111 |
+
|
|
|
|
|
112 |
with gr.Row():
|
113 |
+
output_table = gr.Dataframe(label="Filtered Data")
|
114 |
+
gender_chart = gr.Plot()
|
115 |
+
country_chart = gr.Plot()
|
116 |
+
|
117 |
+
apply_filters.click(
|
118 |
+
app,
|
119 |
+
inputs=[fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state],
|
120 |
+
outputs=[output_table, gender_chart, country_chart]
|
121 |
+
)
|
122 |
|
123 |
+
demo.launch()
|