Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,9 +6,14 @@ import seaborn as sns
|
|
6 |
# Load and clean data
|
7 |
def load_and_clean_data(file_path):
|
8 |
data = pd.read_csv(file_path)
|
9 |
-
|
10 |
-
|
11 |
-
data.
|
|
|
|
|
|
|
|
|
|
|
12 |
return data
|
13 |
|
14 |
# Load dataset
|
@@ -50,94 +55,70 @@ def generate_visuals(filtered_data):
|
|
50 |
plt.ylabel("Count", fontsize=14)
|
51 |
plt.xticks(fontsize=12)
|
52 |
plt.yticks(fontsize=12)
|
|
|
53 |
gender_chart = gr.Plot(plt.gcf())
|
54 |
plt.close()
|
55 |
|
56 |
-
#
|
57 |
-
employer_counts = filtered_data['employer_name'].value_counts().head(10)
|
58 |
plt.figure(figsize=(12, 8))
|
59 |
-
employer_counts
|
|
|
60 |
plt.title("Top 10 Employers", fontsize=16)
|
61 |
-
plt.
|
|
|
|
|
62 |
employer_chart = gr.Plot(plt.gcf())
|
63 |
plt.close()
|
64 |
|
65 |
-
|
66 |
-
plt.figure(figsize=(12, 8))
|
67 |
-
sns.histplot(filtered_data['wage_amt'], kde=True, color="blue", bins=30)
|
68 |
-
plt.title("Salary Distribution", fontsize=16)
|
69 |
-
plt.xlabel("Salary (wage_amt)", fontsize=14)
|
70 |
-
plt.ylabel("Frequency", fontsize=14)
|
71 |
-
plt.xticks(fontsize=12)
|
72 |
-
plt.yticks(fontsize=12)
|
73 |
-
salary_chart = gr.Plot(plt.gcf())
|
74 |
-
plt.close()
|
75 |
-
|
76 |
-
return gender_chart, employer_chart, salary_chart
|
77 |
|
78 |
-
# Gradio
|
79 |
def gradio_app(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state):
|
80 |
filtered_data = filter_data(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state)
|
81 |
-
gender_chart, employer_chart
|
82 |
-
return (
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
-
# Gradio app interface
|
99 |
with gr.Blocks() as app:
|
100 |
with gr.Row():
|
101 |
-
gr.Markdown("
|
102 |
-
|
103 |
with gr.Row():
|
104 |
with gr.Column():
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
country_of_birth = gr.Dropdown(label="Country of Birth", choices=country_birth_options, value="All")
|
109 |
-
country_of_nationality = gr.Dropdown(label="Country of Nationality", choices=country_nationality_options, value="All")
|
110 |
-
min_salary = gr.Number(label="Minimum Salary (USD)", value=0)
|
111 |
-
max_salary = gr.Number(label="Maximum Salary (USD)", value=1000000)
|
112 |
-
worksite_city = gr.Dropdown(label="Worksite City", choices=worksite_city_options, value="All")
|
113 |
-
worksite_state = gr.Dropdown(label="Worksite State", choices=worksite_state_options, value="All")
|
114 |
-
|
115 |
-
apply_filters = gr.Button("Apply Filters", elem_id="apply-filters-btn", style={"background-color": "#007BFF", "color": "white", "padding": "10px", "font-size": "16px"})
|
116 |
-
|
117 |
-
with gr.Row():
|
118 |
-
gr.Markdown("<h2>Filtered Data</h2>")
|
119 |
-
data_table = gr.Dataframe(label="Filtered Data")
|
120 |
-
|
121 |
with gr.Row():
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
with gr.Row():
|
126 |
-
gr.Markdown("<h2>Top 10 Employers</h2>")
|
127 |
-
employer_chart = gr.Plot()
|
128 |
-
|
129 |
-
with gr.Row():
|
130 |
-
gr.Markdown("<h2>Salary Distribution</h2>")
|
131 |
-
salary_chart = gr.Plot()
|
132 |
-
|
133 |
-
apply_filters.click(
|
134 |
-
gradio_app,
|
135 |
-
inputs=[
|
136 |
-
fiscal_year, employer, job_title, country_of_birth, country_of_nationality,
|
137 |
-
min_salary, max_salary, worksite_city, worksite_state
|
138 |
-
],
|
139 |
-
outputs=[data_table, gender_chart, employer_chart, salary_chart]
|
140 |
-
)
|
141 |
|
142 |
-
# Run the app
|
143 |
app.launch()
|
|
|
6 |
# Load and clean data
|
7 |
def load_and_clean_data(file_path):
|
8 |
data = pd.read_csv(file_path)
|
9 |
+
|
10 |
+
# Explicitly handle incompatible dtype columns
|
11 |
+
for col in data.select_dtypes(include=['float64', 'int64']).columns:
|
12 |
+
data[col].fillna(-1, inplace=True) # Use a numeric placeholder for numeric columns
|
13 |
+
for col in data.select_dtypes(include=['datetime64[ns]']).columns:
|
14 |
+
data[col].fillna(pd.Timestamp("1970-01-01"), inplace=True) # Use a placeholder datetime
|
15 |
+
data.fillna("Unknown", inplace=True) # Remaining columns (object types) filled with "Unknown"
|
16 |
+
|
17 |
return data
|
18 |
|
19 |
# Load dataset
|
|
|
55 |
plt.ylabel("Count", fontsize=14)
|
56 |
plt.xticks(fontsize=12)
|
57 |
plt.yticks(fontsize=12)
|
58 |
+
plt.tight_layout()
|
59 |
gender_chart = gr.Plot(plt.gcf())
|
60 |
plt.close()
|
61 |
|
62 |
+
# Employer Distribution
|
|
|
63 |
plt.figure(figsize=(12, 8))
|
64 |
+
employer_counts = filtered_data['employer_name'].value_counts().head(10)
|
65 |
+
sns.barplot(x=employer_counts.values, y=employer_counts.index, palette='coolwarm')
|
66 |
plt.title("Top 10 Employers", fontsize=16)
|
67 |
+
plt.xlabel("Count", fontsize=14)
|
68 |
+
plt.ylabel("Employer Name", fontsize=14)
|
69 |
+
plt.tight_layout()
|
70 |
employer_chart = gr.Plot(plt.gcf())
|
71 |
plt.close()
|
72 |
|
73 |
+
return gender_chart, employer_chart
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
+
# Gradio app
|
76 |
def gradio_app(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state):
|
77 |
filtered_data = filter_data(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state)
|
78 |
+
gender_chart, employer_chart = generate_visuals(filtered_data)
|
79 |
+
return filtered_data.head(10), gender_chart, employer_chart
|
80 |
+
|
81 |
+
# Dropdown and slider options
|
82 |
+
fiscal_year_options = ["All"] + sorted(data['fiscal_year'].dropna().astype(str).unique())
|
83 |
+
employer_options = ["All"] + sorted(data['employer_name'].unique())
|
84 |
+
job_title_options = ["All"] + sorted(data['job_title'].unique())
|
85 |
+
country_of_birth_options = ["All"] + sorted(data['country_of_birth'].unique())
|
86 |
+
country_of_nationality_options = ["All"] + sorted(data['country_of_nationality'].unique())
|
87 |
+
worksite_city_options = ["All"] + sorted(data['worksite_city'].unique())
|
88 |
+
worksite_state_options = ["All"] + sorted(data['worksite_state'].unique())
|
89 |
+
|
90 |
+
# Define Gradio interface
|
91 |
+
filters = [
|
92 |
+
gr.Dropdown(fiscal_year_options, label="Fiscal Year"),
|
93 |
+
gr.Dropdown(employer_options, label="Employer Name"),
|
94 |
+
gr.Dropdown(job_title_options, label="Job Title"),
|
95 |
+
gr.Dropdown(country_of_birth_options, label="Country of Birth"),
|
96 |
+
gr.Dropdown(country_of_nationality_options, label="Country of Nationality"),
|
97 |
+
gr.Slider(0, 500000, step=1000, label="Minimum Salary"),
|
98 |
+
gr.Slider(0, 500000, step=1000, label="Maximum Salary"),
|
99 |
+
gr.Dropdown(worksite_city_options, label="Worksite City"),
|
100 |
+
gr.Dropdown(worksite_state_options, label="Worksite State"),
|
101 |
+
]
|
102 |
+
|
103 |
+
apply_button = gr.Button("Apply Filters")
|
104 |
+
|
105 |
+
outputs = [
|
106 |
+
gr.DataFrame(label="Filtered Data"),
|
107 |
+
gr.Plot(label="Gender Distribution"),
|
108 |
+
gr.Plot(label="Top Employers")
|
109 |
+
]
|
110 |
|
|
|
111 |
with gr.Blocks() as app:
|
112 |
with gr.Row():
|
113 |
+
gr.Markdown("## H-1B Visa Data Visualizer")
|
|
|
114 |
with gr.Row():
|
115 |
with gr.Column():
|
116 |
+
for filter_ in filters:
|
117 |
+
filter_.style(container=True)
|
118 |
+
apply_button.style()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
with gr.Row():
|
120 |
+
apply_button.click(gradio_app, filters, outputs)
|
121 |
+
for output in outputs:
|
122 |
+
gr.Row(output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
|
|
124 |
app.launch()
|