Spaces:

LeonceNsh
/

h1b_visa_trends

Sleeping

App Files Files Community

LeonceNsh commited on Jan 13

Commit

e50d04a

verified ·

1 Parent(s): ec8154b

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -75

app.py CHANGED Viewed

@@ -6,9 +6,14 @@ import seaborn as sns
 # Load and clean data
 def load_and_clean_data(file_path):
     data = pd.read_csv(file_path)
-    data['rec_date'] = pd.to_datetime(data['rec_date'], errors='coerce')
-    data['first_decision_date'] = pd.to_datetime(data['first_decision_date'], errors='coerce')
-    data.fillna("Unknown", inplace=True)
     return data
 # Load dataset
@@ -50,94 +55,70 @@ def generate_visuals(filtered_data):
     plt.ylabel("Count", fontsize=14)
     plt.xticks(fontsize=12)
     plt.yticks(fontsize=12)
     gender_chart = gr.Plot(plt.gcf())
     plt.close()
-    # Pie chart for top 10 employers
-    employer_counts = filtered_data['employer_name'].value_counts().head(10)
     plt.figure(figsize=(12, 8))
-    employer_counts.plot(kind='pie', autopct='%1.1f%%', colors=sns.color_palette('Set3'))
     plt.title("Top 10 Employers", fontsize=16)
-    plt.ylabel("")  # Hide the default ylabel
     employer_chart = gr.Plot(plt.gcf())
     plt.close()
-    # Salary Distribution
-    plt.figure(figsize=(12, 8))
-    sns.histplot(filtered_data['wage_amt'], kde=True, color="blue", bins=30)
-    plt.title("Salary Distribution", fontsize=16)
-    plt.xlabel("Salary (wage_amt)", fontsize=14)
-    plt.ylabel("Frequency", fontsize=14)
-    plt.xticks(fontsize=12)
-    plt.yticks(fontsize=12)
-    salary_chart = gr.Plot(plt.gcf())
-    plt.close()
-    return gender_chart, employer_chart, salary_chart
-# Gradio interface
 def gradio_app(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state):
     filtered_data = filter_data(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state)
-    gender_chart, employer_chart, salary_chart = generate_visuals(filtered_data)
-    return (
-        filtered_data.head(10),  # Display first 10 rows of filtered data
-        gender_chart,
-        employer_chart,
-        salary_chart,
-    )
-# Define dropdown options
-fiscal_year_options = ["All"] + sorted(data['fiscal_year'].dropna().unique().astype(str).tolist())
-employer_options = ["All"] + sorted(data['employer_name'].dropna().unique())
-job_title_options = ["All"] + sorted(data['job_title'].dropna().unique())
-country_birth_options = ["All"] + sorted(data['country_of_birth'].dropna().unique())
-country_nationality_options = ["All"] + sorted(data['country_of_nationality'].dropna().unique())
-worksite_city_options = ["All"] + sorted(data['worksite_city'].dropna().unique())
-worksite_state_options = ["All"] + sorted(data['worksite_state'].dropna().unique())
-# Gradio app interface
 with gr.Blocks() as app:
     with gr.Row():
-        gr.Markdown("<h1 style='color: #4CAF50;'>Visa Application Insights Dashboard</h1>")
     with gr.Row():
         with gr.Column():
-            fiscal_year = gr.Dropdown(label="Fiscal Year", choices=fiscal_year_options, value="All")
-            employer = gr.Dropdown(label="Employer Name", choices=employer_options, value="All")
-            job_title = gr.Dropdown(label="Job Title", choices=job_title_options, value="All")
-            country_of_birth = gr.Dropdown(label="Country of Birth", choices=country_birth_options, value="All")
-            country_of_nationality = gr.Dropdown(label="Country of Nationality", choices=country_nationality_options, value="All")
-            min_salary = gr.Number(label="Minimum Salary (USD)", value=0)
-            max_salary = gr.Number(label="Maximum Salary (USD)", value=1000000)
-            worksite_city = gr.Dropdown(label="Worksite City", choices=worksite_city_options, value="All")
-            worksite_state = gr.Dropdown(label="Worksite State", choices=worksite_state_options, value="All")
-        apply_filters = gr.Button("Apply Filters", elem_id="apply-filters-btn", style={"background-color": "#007BFF", "color": "white", "padding": "10px", "font-size": "16px"})
-    with gr.Row():
-        gr.Markdown("<h2>Filtered Data</h2>")
-        data_table = gr.Dataframe(label="Filtered Data")
     with gr.Row():
-        gr.Markdown("<h2>Gender Distribution</h2>")
-        gender_chart = gr.Plot()
-    with gr.Row():
-        gr.Markdown("<h2>Top 10 Employers</h2>")
-        employer_chart = gr.Plot()
-    with gr.Row():
-        gr.Markdown("<h2>Salary Distribution</h2>")
-        salary_chart = gr.Plot()
-    apply_filters.click(
-        gradio_app,
-        inputs=[
-            fiscal_year, employer, job_title, country_of_birth, country_of_nationality,
-            min_salary, max_salary, worksite_city, worksite_state
-        ],
-        outputs=[data_table, gender_chart, employer_chart, salary_chart]
-    )
-# Run the app
 app.launch()

 # Load and clean data
 def load_and_clean_data(file_path):
     data = pd.read_csv(file_path)
+    # Explicitly handle incompatible dtype columns
+    for col in data.select_dtypes(include=['float64', 'int64']).columns:
+        data[col].fillna(-1, inplace=True)  # Use a numeric placeholder for numeric columns
+    for col in data.select_dtypes(include=['datetime64[ns]']).columns:
+        data[col].fillna(pd.Timestamp("1970-01-01"), inplace=True)  # Use a placeholder datetime
+    data.fillna("Unknown", inplace=True)  # Remaining columns (object types) filled with "Unknown"
     return data
 # Load dataset
     plt.ylabel("Count", fontsize=14)
     plt.xticks(fontsize=12)
     plt.yticks(fontsize=12)
+    plt.tight_layout()
     gender_chart = gr.Plot(plt.gcf())
     plt.close()
+    # Employer Distribution
     plt.figure(figsize=(12, 8))
+    employer_counts = filtered_data['employer_name'].value_counts().head(10)
+    sns.barplot(x=employer_counts.values, y=employer_counts.index, palette='coolwarm')
     plt.title("Top 10 Employers", fontsize=16)
+    plt.xlabel("Count", fontsize=14)
+    plt.ylabel("Employer Name", fontsize=14)
+    plt.tight_layout()
     employer_chart = gr.Plot(plt.gcf())
     plt.close()
+    return gender_chart, employer_chart
+# Gradio app
 def gradio_app(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state):
     filtered_data = filter_data(fiscal_year, employer, job_title, country_of_birth, country_of_nationality, min_salary, max_salary, worksite_city, worksite_state)
+    gender_chart, employer_chart = generate_visuals(filtered_data)
+    return filtered_data.head(10), gender_chart, employer_chart
+# Dropdown and slider options
+fiscal_year_options = ["All"] + sorted(data['fiscal_year'].dropna().astype(str).unique())
+employer_options = ["All"] + sorted(data['employer_name'].unique())
+job_title_options = ["All"] + sorted(data['job_title'].unique())
+country_of_birth_options = ["All"] + sorted(data['country_of_birth'].unique())
+country_of_nationality_options = ["All"] + sorted(data['country_of_nationality'].unique())
+worksite_city_options = ["All"] + sorted(data['worksite_city'].unique())
+worksite_state_options = ["All"] + sorted(data['worksite_state'].unique())
+# Define Gradio interface
+filters = [
+    gr.Dropdown(fiscal_year_options, label="Fiscal Year"),
+    gr.Dropdown(employer_options, label="Employer Name"),
+    gr.Dropdown(job_title_options, label="Job Title"),
+    gr.Dropdown(country_of_birth_options, label="Country of Birth"),
+    gr.Dropdown(country_of_nationality_options, label="Country of Nationality"),
+    gr.Slider(0, 500000, step=1000, label="Minimum Salary"),
+    gr.Slider(0, 500000, step=1000, label="Maximum Salary"),
+    gr.Dropdown(worksite_city_options, label="Worksite City"),
+    gr.Dropdown(worksite_state_options, label="Worksite State"),
+]
+apply_button = gr.Button("Apply Filters")
+outputs = [
+    gr.DataFrame(label="Filtered Data"),
+    gr.Plot(label="Gender Distribution"),
+    gr.Plot(label="Top Employers")
+]
 with gr.Blocks() as app:
     with gr.Row():
+        gr.Markdown("## H-1B Visa Data Visualizer")
     with gr.Row():
         with gr.Column():
+            for filter_ in filters:
+                filter_.style(container=True)
+            apply_button.style()
     with gr.Row():
+        apply_button.click(gradio_app, filters, outputs)
+    for output in outputs:
+        gr.Row(output)
 app.launch()