Spaces:

LeonceNsh
/

h1b_visa_trends

Sleeping

App Files Files Community

LeonceNsh commited on Jan 13

Commit

888756a

verified ·

1 Parent(s): e50d04a

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -70

app.py CHANGED Viewed

@@ -7,12 +7,13 @@ import seaborn as sns
 def load_and_clean_data(file_path):
     data = pd.read_csv(file_path)
-    # Explicitly handle incompatible dtype columns
-    for col in data.select_dtypes(include=['float64', 'int64']).columns:
-        data[col].fillna(-1, inplace=True)  # Use a numeric placeholder for numeric columns
-    for col in data.select_dtypes(include=['datetime64[ns]']).columns:
-        data[col].fillna(pd.Timestamp("1970-01-01"), inplace=True)  # Use a placeholder datetime
-    data.fillna("Unknown", inplace=True)  # Remaining columns (object types) filled with "Unknown"
     return data
@@ -49,76 +50,74 @@ def filter_data(fiscal_year, employer, job_title, country_of_birth, country_of_n
 def generate_visuals(filtered_data):
     # Gender Distribution Bar Chart
     plt.figure(figsize=(12, 8))
-    sns.countplot(data=filtered_data, x='gender', order=filtered_data['gender'].value_counts().index, palette='viridis')
-    plt.title("Gender Distribution", fontsize=16)
-    plt.xlabel("Gender", fontsize=14)
-    plt.ylabel("Count", fontsize=14)
-    plt.xticks(fontsize=12)
-    plt.yticks(fontsize=12)
     plt.tight_layout()
     gender_chart = gr.Plot(plt.gcf())
-    plt.close()
-    # Employer Distribution
     plt.figure(figsize=(12, 8))
-    employer_counts = filtered_data['employer_name'].value_counts().head(10)
-    sns.barplot(x=employer_counts.values, y=employer_counts.index, palette='coolwarm')
-    plt.title("Top 10 Employers", fontsize=16)
-    plt.xlabel("Count", fontsize=14)
-    plt.ylabel("Employer Name", fontsize=14)
     plt.tight_layout()
-    employer_chart = gr.Plot(plt.gcf())
-    plt.close()
-    return gender_chart, employer_chart
-# Gradio app
-def gradio_app(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state):
-    filtered_data = filter_data(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state)
-    gender_chart, employer_chart = generate_visuals(filtered_data)
-    return filtered_data.head(10), gender_chart, employer_chart
-# Dropdown and slider options
-fiscal_year_options = ["All"] + sorted(data['fiscal_year'].dropna().astype(str).unique())
-employer_options = ["All"] + sorted(data['employer_name'].unique())
-job_title_options = ["All"] + sorted(data['job_title'].unique())
-country_of_birth_options = ["All"] + sorted(data['country_of_birth'].unique())
-country_of_nationality_options = ["All"] + sorted(data['country_of_nationality'].unique())
-worksite_city_options = ["All"] + sorted(data['worksite_city'].unique())
-worksite_state_options = ["All"] + sorted(data['worksite_state'].unique())
-# Define Gradio interface
-filters = [
-    gr.Dropdown(fiscal_year_options, label="Fiscal Year"),
-    gr.Dropdown(employer_options, label="Employer Name"),
-    gr.Dropdown(job_title_options, label="Job Title"),
-    gr.Dropdown(country_of_birth_options, label="Country of Birth"),
-    gr.Dropdown(country_of_nationality_options, label="Country of Nationality"),
-    gr.Slider(0, 500000, step=1000, label="Minimum Salary"),
-    gr.Slider(0, 500000, step=1000, label="Maximum Salary"),
-    gr.Dropdown(worksite_city_options, label="Worksite City"),
-    gr.Dropdown(worksite_state_options, label="Worksite State"),
-]
-apply_button = gr.Button("Apply Filters")
-outputs = [
-    gr.DataFrame(label="Filtered Data"),
-    gr.Plot(label="Gender Distribution"),
-    gr.Plot(label="Top Employers")
-]
-with gr.Blocks() as app:
     with gr.Row():
-        gr.Markdown("## H-1B Visa Data Visualizer")
     with gr.Row():
-        with gr.Column():
-            for filter_ in filters:
-                filter_.style(container=True)
-            apply_button.style()
     with gr.Row():
-        apply_button.click(gradio_app, filters, outputs)
-    for output in outputs:
-        gr.Row(output)
-app.launch()

 def load_and_clean_data(file_path):
     data = pd.read_csv(file_path)
+    # Handle missing values explicitly
+    numeric_cols = data.select_dtypes(include=['float64', 'int64']).columns
+    datetime_cols = data.select_dtypes(include=['datetime64[ns]']).columns
+    data[numeric_cols] = data[numeric_cols].fillna(-1)  # Placeholder for numeric columns
+    data[datetime_cols] = data[datetime_cols].fillna(pd.Timestamp("1970-01-01"))  # Placeholder for datetime
+    data.fillna("Unknown", inplace=True)  # Remaining columns
     return data
 def generate_visuals(filtered_data):
     # Gender Distribution Bar Chart
     plt.figure(figsize=(12, 8))
+    sns.countplot(data=filtered_data, x='gender', order=filtered_data['gender'].value_counts().index)
+    plt.title("Gender Distribution")
+    plt.xlabel("Gender")
+    plt.ylabel("Count")
+    plt.xticks(rotation=45)
     plt.tight_layout()
     gender_chart = gr.Plot(plt.gcf())
+    # Country of Birth Distribution
     plt.figure(figsize=(12, 8))
+    filtered_data['country_of_birth'].value_counts().head(10).plot(kind='bar', color='skyblue')
+    plt.title("Top 10 Countries of Birth")
+    plt.xlabel("Country")
+    plt.ylabel("Count")
     plt.tight_layout()
+    country_chart = gr.Plot(plt.gcf())
+    return gender_chart, country_chart
+# Gradio Interface
+def app(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state):
+    filtered = filter_data(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state)
+    gender_chart, country_chart = generate_visuals(filtered)
+    return filtered, gender_chart, country_chart
+# Dropdown options
+fiscal_years = ["All"] + sorted(data['fiscal_year'].dropna().unique().astype(str).tolist())
+employers = ["All"] + sorted(data['employer_name'].dropna().unique().tolist())
+job_titles = ["All"] + sorted(data['job_title'].dropna().unique().tolist())
+countries_of_birth = ["All"] + sorted(data['country_of_birth'].dropna().unique().tolist())
+countries_of_nationality = ["All"] + sorted(data['country_of_nationality'].dropna().unique().tolist())
+worksite_cities = ["All"] + sorted(data['worksite_city'].dropna().unique().tolist())
+worksite_states = ["All"] + sorted(data['worksite_state'].dropna().unique().tolist())
+# Gradio components
+with gr.Blocks() as demo:
+    with gr.Row():
+        gr.Markdown("### Data Exploration Dashboard")
+    with gr.Row():
+        fiscal_year = gr.Dropdown(label="Fiscal Year", choices=fiscal_years)
+        employer = gr.Dropdown(label="Employer", choices=employers)
+        job_title = gr.Dropdown(label="Job Title", choices=job_titles)
+    with gr.Row():
+        country_of_birth = gr.Dropdown(label="Country of Birth", choices=countries_of_birth)
+        country_of_nationality = gr.Dropdown(label="Country of Nationality", choices=countries_of_nationality)
+    with gr.Row():
+        min_salary = gr.Textbox(label="Min Salary (USD)")
+        max_salary = gr.Textbox(label="Max Salary (USD)")
     with gr.Row():
+        worksite_city = gr.Dropdown(label="Worksite City", choices=worksite_cities)
+        worksite_state = gr.Dropdown(label="Worksite State", choices=worksite_states)
     with gr.Row():
+        apply_filters = gr.Button("Apply Filters")
     with gr.Row():
+        output_table = gr.Dataframe(label="Filtered Data")
+        gender_chart = gr.Plot()
+        country_chart = gr.Plot()
+    apply_filters.click(
+        app,
+        inputs=[fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state],
+        outputs=[output_table, gender_chart, country_chart]
+    )
+demo.launch()