LeonceNsh commited on
Commit
888756a
·
verified ·
1 Parent(s): e50d04a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -70
app.py CHANGED
@@ -7,12 +7,13 @@ import seaborn as sns
7
  def load_and_clean_data(file_path):
8
  data = pd.read_csv(file_path)
9
 
10
- # Explicitly handle incompatible dtype columns
11
- for col in data.select_dtypes(include=['float64', 'int64']).columns:
12
- data[col].fillna(-1, inplace=True) # Use a numeric placeholder for numeric columns
13
- for col in data.select_dtypes(include=['datetime64[ns]']).columns:
14
- data[col].fillna(pd.Timestamp("1970-01-01"), inplace=True) # Use a placeholder datetime
15
- data.fillna("Unknown", inplace=True) # Remaining columns (object types) filled with "Unknown"
 
16
 
17
  return data
18
 
@@ -49,76 +50,74 @@ def filter_data(fiscal_year, employer, job_title, country_of_birth, country_of_n
49
  def generate_visuals(filtered_data):
50
  # Gender Distribution Bar Chart
51
  plt.figure(figsize=(12, 8))
52
- sns.countplot(data=filtered_data, x='gender', order=filtered_data['gender'].value_counts().index, palette='viridis')
53
- plt.title("Gender Distribution", fontsize=16)
54
- plt.xlabel("Gender", fontsize=14)
55
- plt.ylabel("Count", fontsize=14)
56
- plt.xticks(fontsize=12)
57
- plt.yticks(fontsize=12)
58
  plt.tight_layout()
59
  gender_chart = gr.Plot(plt.gcf())
60
- plt.close()
61
 
62
- # Employer Distribution
63
  plt.figure(figsize=(12, 8))
64
- employer_counts = filtered_data['employer_name'].value_counts().head(10)
65
- sns.barplot(x=employer_counts.values, y=employer_counts.index, palette='coolwarm')
66
- plt.title("Top 10 Employers", fontsize=16)
67
- plt.xlabel("Count", fontsize=14)
68
- plt.ylabel("Employer Name", fontsize=14)
69
  plt.tight_layout()
70
- employer_chart = gr.Plot(plt.gcf())
71
- plt.close()
72
-
73
- return gender_chart, employer_chart
74
-
75
- # Gradio app
76
- def gradio_app(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state):
77
- filtered_data = filter_data(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state)
78
- gender_chart, employer_chart = generate_visuals(filtered_data)
79
- return filtered_data.head(10), gender_chart, employer_chart
80
-
81
- # Dropdown and slider options
82
- fiscal_year_options = ["All"] + sorted(data['fiscal_year'].dropna().astype(str).unique())
83
- employer_options = ["All"] + sorted(data['employer_name'].unique())
84
- job_title_options = ["All"] + sorted(data['job_title'].unique())
85
- country_of_birth_options = ["All"] + sorted(data['country_of_birth'].unique())
86
- country_of_nationality_options = ["All"] + sorted(data['country_of_nationality'].unique())
87
- worksite_city_options = ["All"] + sorted(data['worksite_city'].unique())
88
- worksite_state_options = ["All"] + sorted(data['worksite_state'].unique())
89
-
90
- # Define Gradio interface
91
- filters = [
92
- gr.Dropdown(fiscal_year_options, label="Fiscal Year"),
93
- gr.Dropdown(employer_options, label="Employer Name"),
94
- gr.Dropdown(job_title_options, label="Job Title"),
95
- gr.Dropdown(country_of_birth_options, label="Country of Birth"),
96
- gr.Dropdown(country_of_nationality_options, label="Country of Nationality"),
97
- gr.Slider(0, 500000, step=1000, label="Minimum Salary"),
98
- gr.Slider(0, 500000, step=1000, label="Maximum Salary"),
99
- gr.Dropdown(worksite_city_options, label="Worksite City"),
100
- gr.Dropdown(worksite_state_options, label="Worksite State"),
101
- ]
102
-
103
- apply_button = gr.Button("Apply Filters")
104
-
105
- outputs = [
106
- gr.DataFrame(label="Filtered Data"),
107
- gr.Plot(label="Gender Distribution"),
108
- gr.Plot(label="Top Employers")
109
- ]
110
-
111
- with gr.Blocks() as app:
112
  with gr.Row():
113
- gr.Markdown("## H-1B Visa Data Visualizer")
 
 
114
  with gr.Row():
115
- with gr.Column():
116
- for filter_ in filters:
117
- filter_.style(container=True)
118
- apply_button.style()
119
  with gr.Row():
120
- apply_button.click(gradio_app, filters, outputs)
121
- for output in outputs:
122
- gr.Row(output)
 
 
 
 
 
 
123
 
124
- app.launch()
 
7
  def load_and_clean_data(file_path):
8
  data = pd.read_csv(file_path)
9
 
10
+ # Handle missing values explicitly
11
+ numeric_cols = data.select_dtypes(include=['float64', 'int64']).columns
12
+ datetime_cols = data.select_dtypes(include=['datetime64[ns]']).columns
13
+
14
+ data[numeric_cols] = data[numeric_cols].fillna(-1) # Placeholder for numeric columns
15
+ data[datetime_cols] = data[datetime_cols].fillna(pd.Timestamp("1970-01-01")) # Placeholder for datetime
16
+ data.fillna("Unknown", inplace=True) # Remaining columns
17
 
18
  return data
19
 
 
50
  def generate_visuals(filtered_data):
51
  # Gender Distribution Bar Chart
52
  plt.figure(figsize=(12, 8))
53
+ sns.countplot(data=filtered_data, x='gender', order=filtered_data['gender'].value_counts().index)
54
+ plt.title("Gender Distribution")
55
+ plt.xlabel("Gender")
56
+ plt.ylabel("Count")
57
+ plt.xticks(rotation=45)
 
58
  plt.tight_layout()
59
  gender_chart = gr.Plot(plt.gcf())
 
60
 
61
+ # Country of Birth Distribution
62
  plt.figure(figsize=(12, 8))
63
+ filtered_data['country_of_birth'].value_counts().head(10).plot(kind='bar', color='skyblue')
64
+ plt.title("Top 10 Countries of Birth")
65
+ plt.xlabel("Country")
66
+ plt.ylabel("Count")
 
67
  plt.tight_layout()
68
+ country_chart = gr.Plot(plt.gcf())
69
+
70
+ return gender_chart, country_chart
71
+
72
+ # Gradio Interface
73
+ def app(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state):
74
+ filtered = filter_data(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state)
75
+ gender_chart, country_chart = generate_visuals(filtered)
76
+ return filtered, gender_chart, country_chart
77
+
78
+ # Dropdown options
79
+ fiscal_years = ["All"] + sorted(data['fiscal_year'].dropna().unique().astype(str).tolist())
80
+ employers = ["All"] + sorted(data['employer_name'].dropna().unique().tolist())
81
+ job_titles = ["All"] + sorted(data['job_title'].dropna().unique().tolist())
82
+ countries_of_birth = ["All"] + sorted(data['country_of_birth'].dropna().unique().tolist())
83
+ countries_of_nationality = ["All"] + sorted(data['country_of_nationality'].dropna().unique().tolist())
84
+ worksite_cities = ["All"] + sorted(data['worksite_city'].dropna().unique().tolist())
85
+ worksite_states = ["All"] + sorted(data['worksite_state'].dropna().unique().tolist())
86
+
87
+ # Gradio components
88
+ with gr.Blocks() as demo:
89
+ with gr.Row():
90
+ gr.Markdown("### Data Exploration Dashboard")
91
+
92
+ with gr.Row():
93
+ fiscal_year = gr.Dropdown(label="Fiscal Year", choices=fiscal_years)
94
+ employer = gr.Dropdown(label="Employer", choices=employers)
95
+ job_title = gr.Dropdown(label="Job Title", choices=job_titles)
96
+
97
+ with gr.Row():
98
+ country_of_birth = gr.Dropdown(label="Country of Birth", choices=countries_of_birth)
99
+ country_of_nationality = gr.Dropdown(label="Country of Nationality", choices=countries_of_nationality)
100
+
101
+ with gr.Row():
102
+ min_salary = gr.Textbox(label="Min Salary (USD)")
103
+ max_salary = gr.Textbox(label="Max Salary (USD)")
104
+
 
 
 
 
 
105
  with gr.Row():
106
+ worksite_city = gr.Dropdown(label="Worksite City", choices=worksite_cities)
107
+ worksite_state = gr.Dropdown(label="Worksite State", choices=worksite_states)
108
+
109
  with gr.Row():
110
+ apply_filters = gr.Button("Apply Filters")
111
+
 
 
112
  with gr.Row():
113
+ output_table = gr.Dataframe(label="Filtered Data")
114
+ gender_chart = gr.Plot()
115
+ country_chart = gr.Plot()
116
+
117
+ apply_filters.click(
118
+ app,
119
+ inputs=[fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state],
120
+ outputs=[output_table, gender_chart, country_chart]
121
+ )
122
 
123
+ demo.launch()