LeonceNsh commited on
Commit
e50d04a
·
verified ·
1 Parent(s): ec8154b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -75
app.py CHANGED
@@ -6,9 +6,14 @@ import seaborn as sns
6
  # Load and clean data
7
  def load_and_clean_data(file_path):
8
  data = pd.read_csv(file_path)
9
- data['rec_date'] = pd.to_datetime(data['rec_date'], errors='coerce')
10
- data['first_decision_date'] = pd.to_datetime(data['first_decision_date'], errors='coerce')
11
- data.fillna("Unknown", inplace=True)
 
 
 
 
 
12
  return data
13
 
14
  # Load dataset
@@ -50,94 +55,70 @@ def generate_visuals(filtered_data):
50
  plt.ylabel("Count", fontsize=14)
51
  plt.xticks(fontsize=12)
52
  plt.yticks(fontsize=12)
 
53
  gender_chart = gr.Plot(plt.gcf())
54
  plt.close()
55
 
56
- # Pie chart for top 10 employers
57
- employer_counts = filtered_data['employer_name'].value_counts().head(10)
58
  plt.figure(figsize=(12, 8))
59
- employer_counts.plot(kind='pie', autopct='%1.1f%%', colors=sns.color_palette('Set3'))
 
60
  plt.title("Top 10 Employers", fontsize=16)
61
- plt.ylabel("") # Hide the default ylabel
 
 
62
  employer_chart = gr.Plot(plt.gcf())
63
  plt.close()
64
 
65
- # Salary Distribution
66
- plt.figure(figsize=(12, 8))
67
- sns.histplot(filtered_data['wage_amt'], kde=True, color="blue", bins=30)
68
- plt.title("Salary Distribution", fontsize=16)
69
- plt.xlabel("Salary (wage_amt)", fontsize=14)
70
- plt.ylabel("Frequency", fontsize=14)
71
- plt.xticks(fontsize=12)
72
- plt.yticks(fontsize=12)
73
- salary_chart = gr.Plot(plt.gcf())
74
- plt.close()
75
-
76
- return gender_chart, employer_chart, salary_chart
77
 
78
- # Gradio interface
79
  def gradio_app(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state):
80
  filtered_data = filter_data(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state)
81
- gender_chart, employer_chart, salary_chart = generate_visuals(filtered_data)
82
- return (
83
- filtered_data.head(10), # Display first 10 rows of filtered data
84
- gender_chart,
85
- employer_chart,
86
- salary_chart,
87
- )
88
-
89
- # Define dropdown options
90
- fiscal_year_options = ["All"] + sorted(data['fiscal_year'].dropna().unique().astype(str).tolist())
91
- employer_options = ["All"] + sorted(data['employer_name'].dropna().unique())
92
- job_title_options = ["All"] + sorted(data['job_title'].dropna().unique())
93
- country_birth_options = ["All"] + sorted(data['country_of_birth'].dropna().unique())
94
- country_nationality_options = ["All"] + sorted(data['country_of_nationality'].dropna().unique())
95
- worksite_city_options = ["All"] + sorted(data['worksite_city'].dropna().unique())
96
- worksite_state_options = ["All"] + sorted(data['worksite_state'].dropna().unique())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
- # Gradio app interface
99
  with gr.Blocks() as app:
100
  with gr.Row():
101
- gr.Markdown("<h1 style='color: #4CAF50;'>Visa Application Insights Dashboard</h1>")
102
-
103
  with gr.Row():
104
  with gr.Column():
105
- fiscal_year = gr.Dropdown(label="Fiscal Year", choices=fiscal_year_options, value="All")
106
- employer = gr.Dropdown(label="Employer Name", choices=employer_options, value="All")
107
- job_title = gr.Dropdown(label="Job Title", choices=job_title_options, value="All")
108
- country_of_birth = gr.Dropdown(label="Country of Birth", choices=country_birth_options, value="All")
109
- country_of_nationality = gr.Dropdown(label="Country of Nationality", choices=country_nationality_options, value="All")
110
- min_salary = gr.Number(label="Minimum Salary (USD)", value=0)
111
- max_salary = gr.Number(label="Maximum Salary (USD)", value=1000000)
112
- worksite_city = gr.Dropdown(label="Worksite City", choices=worksite_city_options, value="All")
113
- worksite_state = gr.Dropdown(label="Worksite State", choices=worksite_state_options, value="All")
114
-
115
- apply_filters = gr.Button("Apply Filters", elem_id="apply-filters-btn", style={"background-color": "#007BFF", "color": "white", "padding": "10px", "font-size": "16px"})
116
-
117
- with gr.Row():
118
- gr.Markdown("<h2>Filtered Data</h2>")
119
- data_table = gr.Dataframe(label="Filtered Data")
120
-
121
  with gr.Row():
122
- gr.Markdown("<h2>Gender Distribution</h2>")
123
- gender_chart = gr.Plot()
124
-
125
- with gr.Row():
126
- gr.Markdown("<h2>Top 10 Employers</h2>")
127
- employer_chart = gr.Plot()
128
-
129
- with gr.Row():
130
- gr.Markdown("<h2>Salary Distribution</h2>")
131
- salary_chart = gr.Plot()
132
-
133
- apply_filters.click(
134
- gradio_app,
135
- inputs=[
136
- fiscal_year, employer, job_title, country_of_birth, country_of_nationality,
137
- min_salary, max_salary, worksite_city, worksite_state
138
- ],
139
- outputs=[data_table, gender_chart, employer_chart, salary_chart]
140
- )
141
 
142
- # Run the app
143
  app.launch()
 
6
  # Load and clean data
7
  def load_and_clean_data(file_path):
8
  data = pd.read_csv(file_path)
9
+
10
+ # Explicitly handle incompatible dtype columns
11
+ for col in data.select_dtypes(include=['float64', 'int64']).columns:
12
+ data[col].fillna(-1, inplace=True) # Use a numeric placeholder for numeric columns
13
+ for col in data.select_dtypes(include=['datetime64[ns]']).columns:
14
+ data[col].fillna(pd.Timestamp("1970-01-01"), inplace=True) # Use a placeholder datetime
15
+ data.fillna("Unknown", inplace=True) # Remaining columns (object types) filled with "Unknown"
16
+
17
  return data
18
 
19
  # Load dataset
 
55
  plt.ylabel("Count", fontsize=14)
56
  plt.xticks(fontsize=12)
57
  plt.yticks(fontsize=12)
58
+ plt.tight_layout()
59
  gender_chart = gr.Plot(plt.gcf())
60
  plt.close()
61
 
62
+ # Employer Distribution
 
63
  plt.figure(figsize=(12, 8))
64
+ employer_counts = filtered_data['employer_name'].value_counts().head(10)
65
+ sns.barplot(x=employer_counts.values, y=employer_counts.index, palette='coolwarm')
66
  plt.title("Top 10 Employers", fontsize=16)
67
+ plt.xlabel("Count", fontsize=14)
68
+ plt.ylabel("Employer Name", fontsize=14)
69
+ plt.tight_layout()
70
  employer_chart = gr.Plot(plt.gcf())
71
  plt.close()
72
 
73
+ return gender_chart, employer_chart
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ # Gradio app
76
  def gradio_app(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state):
77
  filtered_data = filter_data(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state)
78
+ gender_chart, employer_chart = generate_visuals(filtered_data)
79
+ return filtered_data.head(10), gender_chart, employer_chart
80
+
81
+ # Dropdown and slider options
82
+ fiscal_year_options = ["All"] + sorted(data['fiscal_year'].dropna().astype(str).unique())
83
+ employer_options = ["All"] + sorted(data['employer_name'].unique())
84
+ job_title_options = ["All"] + sorted(data['job_title'].unique())
85
+ country_of_birth_options = ["All"] + sorted(data['country_of_birth'].unique())
86
+ country_of_nationality_options = ["All"] + sorted(data['country_of_nationality'].unique())
87
+ worksite_city_options = ["All"] + sorted(data['worksite_city'].unique())
88
+ worksite_state_options = ["All"] + sorted(data['worksite_state'].unique())
89
+
90
+ # Define Gradio interface
91
+ filters = [
92
+ gr.Dropdown(fiscal_year_options, label="Fiscal Year"),
93
+ gr.Dropdown(employer_options, label="Employer Name"),
94
+ gr.Dropdown(job_title_options, label="Job Title"),
95
+ gr.Dropdown(country_of_birth_options, label="Country of Birth"),
96
+ gr.Dropdown(country_of_nationality_options, label="Country of Nationality"),
97
+ gr.Slider(0, 500000, step=1000, label="Minimum Salary"),
98
+ gr.Slider(0, 500000, step=1000, label="Maximum Salary"),
99
+ gr.Dropdown(worksite_city_options, label="Worksite City"),
100
+ gr.Dropdown(worksite_state_options, label="Worksite State"),
101
+ ]
102
+
103
+ apply_button = gr.Button("Apply Filters")
104
+
105
+ outputs = [
106
+ gr.DataFrame(label="Filtered Data"),
107
+ gr.Plot(label="Gender Distribution"),
108
+ gr.Plot(label="Top Employers")
109
+ ]
110
 
 
111
  with gr.Blocks() as app:
112
  with gr.Row():
113
+ gr.Markdown("## H-1B Visa Data Visualizer")
 
114
  with gr.Row():
115
  with gr.Column():
116
+ for filter_ in filters:
117
+ filter_.style(container=True)
118
+ apply_button.style()
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  with gr.Row():
120
+ apply_button.click(gradio_app, filters, outputs)
121
+ for output in outputs:
122
+ gr.Row(output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
 
124
  app.launch()