import gradio as gr import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from selenium import webdriver from selenium.webdriver.edge.service import Service from selenium.webdriver.edge.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait, Select from selenium.webdriver.support import expected_conditions as EC from webdriver_manager.microsoft import EdgeChromiumDriverManager from datetime import datetime import time import numpy as np from selenium.common.exceptions import StaleElementReferenceException, TimeoutException, NoSuchElementException #--- Scraping Functions (Unchanged) --- def wait_for_element(driver, locator): return WebDriverWait(driver, 10).until(EC.element_to_be_clickable(locator)) def select_date_month_day(driver, date_str, date_input_id): date_to_select = datetime.strptime(date_str, '%Y-%m-%d') date_input = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, date_input_id))) date_input.click() month_select = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'ui-datepicker-month'))) month_option = month_select.find_element(By.XPATH, f"//option[@value='{date_to_select.month - 1}']") month_option.click() day = date_to_select.day day_element = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, f"//td[@data-handler='selectDay']/a[text()='{day}']"))) day_element.click() time.sleep(5) def select_date(driver, date_str, date_input_id): date_to_select = datetime.strptime(date_str, '%Y-%m-%d') date_input = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, date_input_id))) date_input.click() time.sleep(2) month_select = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'ui-datepicker-month'))) month_option = month_select.find_element(By.XPATH, f"//option[@value='{date_to_select.month - 1}']") time.sleep(2) month_option.click() year_select = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'ui-datepicker-year'))) year_option = year_select.find_element(By.XPATH, f"//option[@value='{date_to_select.year}']") year_option.click() day = date_to_select.day day_element = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, f"//td[@data-handler='selectDay']/a[text()='{day}']"))) day_element.click() time.sleep(5) def extract_grid_data_clm_summary(driver): data = [] try: total_pages_element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "sp_1_pjqgridClmSummbyProv"))) total_pages = int(total_pages_element.text.strip()) except: return data for current_page in range(1, total_pages + 1): time.sleep(3) driver.execute_script("window.scrollTo(0, 0);") grid = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, "jqgridClmSummbyProv"))) rows = WebDriverWait(grid, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "tr.jqgrow"))) for row in rows: try: provider_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_ProvName']").text visits = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_NoOfVisit']").text claim = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_ClmAmt']").text try: total_mc = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridClmSummbyProv_STR_TotalMC']").text except: total_mc = '0' data.append({'Provider Name': provider_name, 'No of Visits': visits, 'Total Claim': claim, 'Total MC (Days)': total_mc}) except Exception as e: print(f"Error extracting row: {e}") continue if current_page < total_pages: try: next_button_div = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward"))) driver.execute_script("arguments[0].scrollIntoView(true);", next_button_div) next_button_div.click() WebDriverWait(driver, 10).until(EC.staleness_of(rows[0])) except: break return data def extract_grid_data_patient_analysis(driver): all_data = [] while True: driver.execute_script("window.scrollTo(0, 0);") grid = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "jqgridCorpMcAnalysis"))) rows = grid.find_elements(By.CSS_SELECTOR, "tr.jqgrow") for row in rows: try: employee_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_NAME']").text employee_no = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_EMPID']").text division = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_MEM_EMPDIVISION']").text total_visit = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalVisit']").text total_mc = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalMC']").text total_claim_own = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalClaim_Own']").text total_claim_dep = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgridCorpMcAnalysis_totalClaim_Dep']").text all_data.append({ 'Employee Name': employee_name, 'Employee No': employee_no, 'Division/Department': division, 'Total Visit': total_visit, 'Total MC (Days)': total_mc, 'Total Claim (Own)': total_claim_own, 'Total Claim (Dep)': total_claim_dep }) except: continue try: driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") next_button = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward"))) parent_div = next_button.find_element(By.XPATH, "./parent::div") if "disabled" in parent_div.get_attribute("class"): break driver.execute_script("arguments[0].scrollIntoView(true);", parent_div) parent_div.click() WebDriverWait(driver, 10).until(EC.staleness_of(rows[0])) except: break return all_data def extract_grid_data_mc(driver): data = [] try: total_pages_element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "sp_1_jqgrid"))) total_pages = int(total_pages_element.text.strip()) except: total_pages = 1 for current_page in range(1, total_pages + 1): time.sleep(3) driver.execute_script("window.scrollTo(0, 0);") grid = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, "jqgrid"))) rows = WebDriverWait(grid, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "tr.jqgrow"))) for row in rows: try: provider_name = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_ProvName']").text.strip() total_mc_given = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_MC_Given_Count']").text.strip() total_visits = row.find_element(By.CSS_SELECTOR, "td[aria-describedby='jqgrid_STR_VISITCount']").text.strip() data.append({'Provider': provider_name, 'Total MC Given': total_mc_given, 'No. of Visit': total_visits}) except: continue if current_page < total_pages: try: next_button_div = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div.btn.btn-sm.btn-default span.fa.fa-forward"))) driver.execute_script("arguments[0].scrollIntoView(true);", next_button_div) next_button_div.click() WebDriverWait(driver, 10).until(EC.staleness_of(rows[0])) except: break return data def scrape_data(url, user_id, password): edge_options = Options() driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()), options=edge_options) start_year = 2024 current_date = datetime.now().strftime('%Y-%m-%d') patient_data_by_year = {} claim_data_by_year = {} mc_data_by_year = {} try: driver.get(url) image = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//img[@src='/ClaimEXMVR/Servlet_LoadImage?SFC=loadImage&imageName=icorporate.png']"))) image.click() user_id_field = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.NAME, "txtloginid"))) user_id_field.send_keys(user_id) password_field = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, "inputpss"))) password_field.send_keys(password) sign_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.btn.btn-primary[type='submit']"))) sign_button.click() continue_button = wait_for_element(driver, (By.XPATH, "//button[text()='Continue']")) continue_button.click() for year in range(start_year, 2025 + 1): if year == 2024: start_date = "2024-01-01" end_date = "2024-12-31" else: start_date = "2025-01-01" end_date = current_date productivity_link = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[span[text()='Productivity Reports']]"))) productivity_link.click() patient_analysis_link = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[@href='#/Patient_Analysis_Report'][span[text()=' Patient Analysis Report ']]"))) patient_analysis_link.click() select_date(driver, start_date, "txtStartDate") select_date(driver, end_date, "txtEndDate") search_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "btnSearch"))) search_button.click() time.sleep(5) dropdown = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "select.ui-pg-selbox"))) driver.execute_script("arguments[0].scrollIntoView(true);", dropdown) select = Select(dropdown) select.select_by_value("100") time.sleep(10) patient_data = extract_grid_data_patient_analysis(driver) patient_df = pd.DataFrame(patient_data) numeric_cols_patient = ['Total Visit', 'Total MC (Days)', 'Total Claim (Own)', 'Total Claim (Dep)'] for col in numeric_cols_patient: patient_df[col] = pd.to_numeric(patient_df[col], errors='coerce') patient_df['Total Claim (Combined)'] = patient_df['Total Claim (Own)'] + patient_df['Total Claim (Dep)'] patient_df['Avg Claim per Visit'] = patient_df['Total Claim (Combined)'] / patient_df['Total Visit'] patient_df['Avg MC per Visit'] = patient_df['Total MC (Days)'] / patient_df['Total Visit'] patient_df['Avg Claim per MC'] = patient_df['Total Claim (Combined)'] / patient_df['Total MC (Days)'] patient_data_by_year[year] = patient_df for year in range(start_year, 2025 + 1): if year == 2024: start_date = "2024-01-01" end_date = "2024-12-31" else: start_date = "2025-01-01" end_date = current_date productivity_link = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[span[text()='Productivity Reports']]"))) productivity_link.click() mc_link = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[@href='#/MC_HealthCare_By_Provider'][span[text()=' MC by Provider ']]"))) mc_link.click() time.sleep(2) select_date(driver, start_date, "txtStartDate") select_date(driver, end_date, "txtEndDate") search_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "btnSearch"))) search_button.click() time.sleep(10) dropdown = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "select.ui-pg-selbox"))) driver.execute_script("arguments[0].scrollIntoView(true);", dropdown) select = Select(dropdown) select.select_by_value("100") time.sleep(10) mc_data = extract_grid_data_mc(driver) mc_df = pd.DataFrame(mc_data) numeric_cols_mc = ['Total MC Given', 'No. of Visit'] for col in numeric_cols_mc: mc_df[col] = pd.to_numeric(mc_df[col], errors='coerce') mc_df['% MC Given'] = (mc_df['Total MC Given'] / mc_df['No. of Visit']) * 100 mc_data_by_year[year] = mc_df start_date = "2024-01-01" end_date = "2025-01-01" reg_claims_link = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[.//span[contains(text(), 'Registration') and contains(text(), 'Claims')]]"))) reg_claims_link.click() providers_link = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[@href='#/Claim_Summary_by_Provider_Analysis'][span[text()=' Claim Summary by Providers ']]"))) providers_link.click() time.sleep(5) select_date_month_day(driver, start_date, "txtFromDate") select_date_month_day(driver, end_date, "txtToDate") search_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "btnSearch"))) driver.execute_script("arguments[0].click();", search_button) time.sleep(10) dropdown = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "select.ui-pg-selbox"))) driver.execute_script("arguments[0].scrollIntoView(true);", dropdown) select = Select(dropdown) select.select_by_value("100") time.sleep(5) claim_data_2024 = extract_grid_data_clm_summary(driver) claim_df_2024 = pd.DataFrame(claim_data_2024) numeric_cols_claim = ['No of Visits', 'Total Claim', 'Total MC (Days)'] for col in numeric_cols_claim: claim_df_2024[col] = pd.to_numeric(claim_df_2024[col], errors='coerce') claim_df_2024['Avg Claim per Visit'] = claim_df_2024['Total Claim'] / claim_df_2024['No of Visits'] claim_data_by_year[2024] = claim_df_2024 start_date = "2024-12-31" end_date = current_date reg_claims_link = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[.//span[contains(text(), 'Registration') and contains(text(), 'Claims')]]"))) reg_claims_link.click() providers_link = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[@href='#/Claim_Summary_by_Provider_Analysis'][span[text()=' Claim Summary by Providers ']]"))) providers_link.click() time.sleep(5) select_date_month_day(driver, start_date, "txtFromDate") select_date_month_day(driver, end_date, "txtToDate") search_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "btnSearch"))) driver.execute_script("arguments[0].click();", search_button) time.sleep(10) dropdown = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "select.ui-pg-selbox"))) driver.execute_script("arguments[0].scrollIntoView(true);", dropdown) select = Select(dropdown) select.select_by_value("100") time.sleep(5) claim_data_recent = extract_grid_data_clm_summary(driver) claim_df_recent = pd.DataFrame(claim_data_recent) for col in numeric_cols_claim: claim_df_recent[col] = pd.to_numeric(claim_df_recent[col], errors='coerce') claim_df_recent['Avg Claim per Visit'] = claim_df_recent['Total Claim'] / claim_df_recent['No of Visits'] claim_data_by_year[2025] = claim_df_recent return patient_data_by_year, claim_data_by_year, mc_data_by_year, "Data scraped successfully!" except Exception as e: return None, None, None, f"Error: {str(e)}" finally: driver.quit() # --- Plotting Functions --- def generate_dashboard_charts(patient_data_by_year, claim_data_by_year, mc_data_by_year, year, show_mc_pct=True, mc_sort_order="desc"): if not patient_data_by_year or not claim_data_by_year or not mc_data_by_year: return None, None year_int = int(year) patient_df = patient_data_by_year.get(year_int, pd.DataFrame()) claim_df = claim_data_by_year.get(year_int, pd.DataFrame()) mc_df = mc_data_by_year.get(year_int, pd.DataFrame()) if patient_df.empty or claim_df.empty or mc_df.empty: return None, None # Professional styling sns.set(style="whitegrid", palette="muted") plt.rcParams.update({ 'font.family': 'Helvetica', 'font.size': 12, 'axes.titlesize': 16, 'axes.labelsize': 14, 'xtick.labelsize': 11, 'ytick.labelsize': 11, 'axes.titleweight': 'bold', 'axes.linewidth': 1.5, 'grid.linestyle': ':', 'grid.alpha': 0.5, 'figure.facecolor': '#f5f6f5', 'axes.facecolor': '#ffffff', 'axes.edgecolor': '#333333', 'axes.labelcolor': '#333333', 'text.color': '#333333' }) provider_charts = [] employee_charts = [] # --- Provider Dashboard Charts --- # 1. Total Visits by Providers (Horizontal Bar) plt.figure(figsize=(12, 6)) top_prov_visits = mc_df.sort_values('No. of Visit', ascending=False).head(10) ax = sns.barplot(data=top_prov_visits, x='No. of Visit', y='Provider', hue='Provider', palette='Blues_r', legend=False, edgecolor='black', linewidth=0.5) ax.set_title(f'Top 10 Providers by Total Visits ({year})', pad=15) ax.set_xlabel('Total Visits') ax.set_ylabel('Provider') for i, v in enumerate(top_prov_visits['No. of Visit']): ax.text(v + 0.5, i, f'{int(v)}', va='center', fontsize=10, color='#333333') plt.tight_layout() provider_charts.append(plt.gcf()) plt.close() # 2. Total MC by Providers (Horizontal Bar) plt.figure(figsize=(12, 6)) top_prov_mc = mc_df.sort_values('Total MC Given', ascending=False).head(10) ax = sns.barplot(data=top_prov_mc, x='Total MC Given', y='Provider', hue='Provider', palette='Greens_r', legend=False, edgecolor='black', linewidth=0.5) ax.set_title(f'Top 10 Providers by Total MC Given ({year})', pad=15) ax.set_xlabel('Total MC (Days)') ax.set_ylabel('Provider') for i, v in enumerate(top_prov_mc['Total MC Given']): ax.text(v + 0.5, i, f'{int(v)}', va='center', fontsize=10, color='#333333') plt.tight_layout() provider_charts.append(plt.gcf()) plt.close() # 3. % MC Given by Providers (Larger Bar, Top 20, Sortable) if show_mc_pct: plt.figure(figsize=(18, 9)) # Larger size for professionalism and readability top_visits_provs = set(mc_df.sort_values('No. of Visit', ascending=False).head(10)['Provider']) top_mc_provs = set(mc_df.sort_values('Total MC Given', ascending=False).head(10)['Provider']) top_provs = top_visits_provs.union(top_mc_provs) top_prov_mc_pct = mc_df[mc_df['Provider'].isin(top_provs)].sort_values( '% MC Given', ascending=(mc_sort_order == "asc")).head(20) ax = sns.barplot(data=top_prov_mc_pct, x='Provider', y='% MC Given', hue='Provider', palette='Purples_r', legend=False, edgecolor='black', linewidth=0.5) ax.set_title(f'Top 20 Providers by % MC Given ({year}) - Sorted {"Ascending" if mc_sort_order == "asc" else "Descending"}', pad=15) ax.set_ylabel('% MC Given', fontsize=14) ax.set_xlabel('Provider', fontsize=14) plt.xticks(rotation=45, ha='right', fontsize=11) for i, v in enumerate(top_prov_mc_pct['% MC Given']): ax.text(i, v + 1, f'{v:.1f}%', ha='center', fontsize=10, color='#333333') plt.tight_layout() provider_charts.append(plt.gcf()) else: provider_charts.append(None) plt.close() # 4. Total Claim by Providers (Horizontal Bar) plt.figure(figsize=(12, 6)) top_prov_claim = claim_df.sort_values('Total Claim', ascending=False).head(10) ax = sns.barplot(data=top_prov_claim, x='Total Claim', y='Provider Name', hue='Provider Name', palette='Oranges_r', legend=False, edgecolor='black', linewidth=0.5) ax.set_title(f'Top 10 Providers by Total Claim ({year})', pad=15) ax.set_xlabel('Total Claim ($)') ax.set_ylabel('Provider') for i, v in enumerate(top_prov_claim['Total Claim']): ax.text(v + 0.5, i, f'{v:,.2f}', va='center', fontsize=10, color='#333333') plt.tight_layout() provider_charts.append(plt.gcf()) plt.close() # 5. Average Claim per Visit by Providers (Bar) plt.figure(figsize=(12, 6)) top_prov_avg_claim = claim_df.sort_values('Avg Claim per Visit', ascending=False).head(10) ax = sns.barplot(data=top_prov_avg_claim, x='Provider Name', y='Avg Claim per Visit', hue='Provider Name', palette='Reds_r', legend=False, edgecolor='black', linewidth=0.5) ax.set_title(f'Top 10 Providers by Avg Claim per Visit ({year})', pad=15) ax.set_ylabel('Avg Claim per Visit ($)') ax.set_xlabel('Provider') plt.xticks(rotation=45, ha='right') for i, v in enumerate(top_prov_avg_claim['Avg Claim per Visit']): ax.text(i, v + 0.5, f'{v:.2f}', ha='center', fontsize=10, color='#333333') plt.tight_layout() provider_charts.append(plt.gcf()) plt.close() # --- Employee Dashboard Charts --- # 1. Total Visits by Employees plt.figure(figsize=(12, 6)) top_emp_visits = patient_df.sort_values('Total Visit', ascending=False).head(10) ax = sns.barplot(data=top_emp_visits, x='Total Visit', y='Employee Name', hue='Employee Name', palette='Blues_r', legend=False, edgecolor='black', linewidth=0.5) ax.set_title(f'Top 10 Employees by Total Visits ({year})', pad=15) ax.set_xlabel('Total Visits') ax.set_ylabel('Employee') for i, v in enumerate(top_emp_visits['Total Visit']): ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10, color='#333333') plt.tight_layout() employee_charts.append(plt.gcf()) plt.close() # 2. Total Claim by Employees plt.figure(figsize=(12, 6)) top_emp_claim = patient_df.sort_values('Total Claim (Combined)', ascending=False).head(10) ax = sns.barplot(data=top_emp_claim, x='Total Claim (Combined)', y='Employee Name', hue='Employee Name', palette='Oranges_r', legend=False, edgecolor='black', linewidth=0.5) ax.set_title(f'Top 10 Employees by Total Claim ({year})', pad=15) ax.set_xlabel('Total Claim ($)') ax.set_ylabel('Employee') for i, v in enumerate(top_emp_claim['Total Claim (Combined)']): ax.text(v + 1, i, f'{v:,.2f}', va='center', fontsize=10, color='#333333') plt.tight_layout() employee_charts.append(plt.gcf()) plt.close() # 3. Average Claim per Visit by Employees plt.figure(figsize=(12, 6)) top_emp_avg_claim = patient_df.sort_values('Avg Claim per Visit', ascending=False).head(10) ax = sns.barplot(data=top_emp_avg_claim, x='Employee Name', y='Avg Claim per Visit', hue='Employee Name', palette='Reds_r', legend=False, edgecolor='black', linewidth=0.5) ax.set_title(f'Top 10 Employees by Avg Claim per Visit ({year})', pad=15) ax.set_ylabel('Avg Claim per Visit ($)') ax.set_xlabel('Employee') plt.xticks(rotation=45, ha='right') for i, v in enumerate(top_emp_avg_claim['Avg Claim per Visit']): ax.text(i, v + 0.5, f'{v:.2f}', ha='center', fontsize=10, color='#333333') plt.tight_layout() employee_charts.append(plt.gcf()) plt.close() # 4. Total MC by Employees plt.figure(figsize=(12, 6)) top_emp_mc = patient_df.sort_values('Total MC (Days)', ascending=False).head(10) ax = sns.barplot(data=top_emp_mc, x='Total MC (Days)', y='Employee Name', hue='Employee Name', palette='Greens_r', legend=False, edgecolor='black', linewidth=0.5) ax.set_title(f'Top 10 Employees by Total MC ({year})', pad=15) ax.set_xlabel('Total MC (Days)') ax.set_ylabel('Employee') for i, v in enumerate(top_emp_mc['Total MC (Days)']): ax.text(v + 0.2, i, f'{int(v)}', va='center', fontsize=10, color='#333333') plt.tight_layout() employee_charts.append(plt.gcf()) plt.close() # 5. Average MC per Visit by Employees plt.figure(figsize=(12, 6)) top_emp_avg_mc = patient_df.sort_values('Avg MC per Visit', ascending=False).head(10) ax = sns.barplot(data=top_emp_avg_mc, x='Employee Name', y='Avg MC per Visit', hue='Employee Name', palette='Purples_r', legend=False, edgecolor='black', linewidth=0.5) ax.set_title(f'Top 10 Employees by Avg MC per Visit ({year})', pad=15) ax.set_ylabel('Avg MC per Visit (Days)') ax.set_xlabel('Employee') plt.xticks(rotation=45, ha='right') for i, v in enumerate(top_emp_avg_mc['Avg MC per Visit']): ax.text(i, v + 0.05, f'{v:.2f}', ha='center', fontsize=10, color='#333333') plt.tight_layout() employee_charts.append(plt.gcf()) plt.close() # 6. Division-wise Claim Distribution (Pie) plt.figure(figsize=(10, 6)) division_claims = patient_df.groupby('Division/Department')['Total Claim (Combined)'].sum() plt.pie(division_claims, labels=division_claims.index, autopct='%1.1f%%', colors=sns.color_palette('muted'), startangle=90, textprops={'fontsize': 11, 'color': '#333333'}, wedgeprops={'edgecolor': 'black', 'linewidth': 0.5}) plt.title(f'Claim Distribution by Division ({year})', pad=15) plt.tight_layout() employee_charts.append(plt.gcf()) plt.close() provider_images = [fig_to_image(fig) if fig is not None else None for fig in provider_charts] employee_images = [fig_to_image(fig) for fig in employee_charts] return provider_images, employee_images def fig_to_image(fig): if fig is None: return None fig.canvas.draw() img_array = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8) img_array = img_array.reshape(fig.canvas.get_width_height()[::-1] + (4,)) plt.close(fig) return img_array # --- Gradio Interface --- with gr.Blocks(title="Claims Analysis Dashboard", css=""" body { background-color: #f5f6f5; } h1, h2 { color: #333333; font-family: Helvetica; } """) as demo: gr.Markdown("# Claims Analysis Dashboard (2024 - Present)") with gr.Row(): url_input = gr.Textbox(label="Website URL", placeholder="Enter URL here", lines=1) user_id_input = gr.Textbox(label="User ID", placeholder="Enter User ID", lines=1) password_input = gr.Textbox(label="Password", type="password", placeholder="Enter Password", lines=1) scrape_btn = gr.Button("Submit", variant="primary") with gr.Row(): year_dropdown = gr.Dropdown( label="Select Year to View Data", choices=["2024", "2025"], value="2024", allow_custom_value=False ) show_mc_pct_checkbox = gr.Checkbox(label="Show % MC Given Chart", value=True) mc_sort_dropdown = gr.Dropdown( label="Sort % MC Given", choices=["desc", "asc"], value="desc", allow_custom_value=False ) status_output = gr.Textbox(label="Status", lines=2, interactive=False) patient_state = gr.State() claim_state = gr.State() mc_state = gr.State() with gr.Tabs(): with gr.TabItem("Provider Insights"): gr.Markdown("## Provider Insights Dashboard") with gr.Row(): prov_chart1 = gr.Image(label="Total Visits by Providers", interactive=False) prov_chart2 = gr.Image(label="Total MC by Providers", interactive=False) with gr.Row(): prov_chart3 = gr.Image(label="% MC Given by Providers (Top 20)", interactive=False, visible=True) with gr.Row(): prov_chart4 = gr.Image(label="Total Claim by Providers", interactive=False) prov_chart5 = gr.Image(label="Average Claim per Visit by Providers", interactive=False) with gr.TabItem("Employee Insights"): gr.Markdown("## Employee Insights Dashboard") with gr.Row(): emp_chart1 = gr.Image(label="Total Visits by Employees", interactive=False) emp_chart2 = gr.Image(label="Total Claim by Employees", interactive=False) with gr.Row(): emp_chart3 = gr.Image(label="Average Claim per Visit by Employees", interactive=False) emp_chart4 = gr.Image(label="Total MC by Employees", interactive=False) with gr.Row(): emp_chart5 = gr.Image(label="Average MC per Visit by Employees", interactive=False) emp_chart6 = gr.Image(label="Claim Distribution by Division", interactive=False) # [Event handlers remain unchanged, just ensure inputs/outputs match the above components] def scrape_and_store(url, user_id, password, show_mc_pct, mc_sort_order): patient_data_by_year, claim_data_by_year, mc_data_by_year, status = scrape_data(url, user_id, password) if patient_data_by_year is None or claim_data_by_year is None or mc_data_by_year is None: return status, None, None, None provider_images, employee_images = generate_dashboard_charts( patient_data_by_year, claim_data_by_year, mc_data_by_year, "2024", show_mc_pct, mc_sort_order) return ( status, patient_data_by_year, claim_data_by_year, mc_data_by_year, provider_images[0], provider_images[1], provider_images[2], provider_images[3], provider_images[4], employee_images[0], employee_images[1], employee_images[2], employee_images[3], employee_images[4], employee_images[5] ) def update_dashboard(year, patient_data_by_year, claim_data_by_year, mc_data_by_year, show_mc_pct, mc_sort_order): if not patient_data_by_year or not claim_data_by_year or not mc_data_by_year: return [None] * 11 provider_images, employee_images = generate_dashboard_charts( patient_data_by_year, claim_data_by_year, mc_data_by_year, year, show_mc_pct, mc_sort_order) return ( provider_images[0], provider_images[1], provider_images[2], provider_images[3], provider_images[4], employee_images[0], employee_images[1], employee_images[2], employee_images[3], employee_images[4], employee_images[5] ) scrape_btn.click( fn=lambda url, user_id, password: scrape_and_store(url, user_id, password, show_mc_pct_checkbox.value, mc_sort_dropdown.value), inputs=[url_input, user_id_input, password_input], outputs=[ status_output, patient_state, claim_state, mc_state, prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5, emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6 ] ) year_dropdown.change( fn=update_dashboard, inputs=[year_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown], outputs=[ prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5, emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6 ] ) show_mc_pct_checkbox.change( fn=update_dashboard, inputs=[year_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown], outputs=[ prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5, emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6 ] ) mc_sort_dropdown.change( fn=update_dashboard, inputs=[year_dropdown, patient_state, claim_state, mc_state, show_mc_pct_checkbox, mc_sort_dropdown], outputs=[ prov_chart1, prov_chart2, prov_chart3, prov_chart4, prov_chart5, emp_chart1, emp_chart2, emp_chart3, emp_chart4, emp_chart5, emp_chart6 ] ) demo.launch(share=True)