Spaces:

ans123
/

Truck_Analysis

Sleeping

App Files Files Community

ans123 commited on 4 days ago

Commit

b009ea4

verified ·

1 Parent(s): 38cf4b2

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -57

app.py CHANGED Viewed

@@ -7,14 +7,13 @@ import io
 import base64
 # --- Global Variables to store processed data ---
-# These will be populated once when the Gradio app starts
 global_df = None
 global_brand_resale = None
 global_brand_resale_mean = 0
 global_fair_market_value_mean = 0
 global_purchase_amount_mean = 0
 global_monthly_payment_mean = 0
-global_ownership_types = [] # To populate the dropdown for ownership type
 # === Truck ID Cleaner ===
 def clean_truck_id(val):
@@ -74,7 +73,6 @@ def load_and_clean_data():
         return finance, maintenance, distance, odometer, stub, paper
     except FileNotFoundError as e:
         print(f"Error: One or more input files not found. Please ensure all Excel files are in the same directory as the script. Missing file: {e.filename}")
-        # In a Gradio app, sys.exit() would stop the server. Instead, return None or raise a specific error.
         raise gr.Error(f"Required file not found: {e.filename}. Please upload all necessary Excel files.")
     except Exception as e:
         print(f"An unexpected error occurred during data loading: {e}")
@@ -138,7 +136,16 @@ def initial_data_processing():
         # --- Standardize 'ownership_type' ---
         df['ownership_type'] = df['ownership_type'].astype(str).str.strip().str.upper()
-        global_ownership_types = df['ownership_type'].unique().tolist() # Store for Gradio dropdown
         # --- Handle NaNs for decision-making columns ---
         df["total_repairs"] = df["total_repairs"].fillna(0)
@@ -148,11 +155,6 @@ def initial_data_processing():
         df["odo_diff"] = df["odo_diff"].fillna(0).apply(lambda x: 0 if x < 0 else x)
-        # Calculate means for imputation, handling potential NaN means if column is all NaN
-        global_fair_market_value_mean = df['fair_market_value'].mean()
-        global_purchase_amount_mean = df['purchase_amount'].mean()
-        global_monthly_payment_mean = df['monthly_payment'].mean()
         df["avg_resale_value"] = df["avg_resale_value"].fillna(global_brand_resale_mean if not pd.isna(global_brand_resale_mean) else 0)
         df["fair_market_value"] = df["fair_market_value"].fillna(global_fair_market_value_mean if not pd.isna(global_fair_market_value_mean) else 0)
         df["purchase_amount"] = df["purchase_amount"].fillna(global_purchase_amount_mean if not pd.isna(global_purchase_amount_mean) else 0)
@@ -200,16 +202,16 @@ def initial_data_processing():
         df["Decision"] = df.apply(make_decision_for_df, axis=1)
-        global_df = df # Store the fully processed DataFrame globally for plotting
         print("Initial data processing complete. Data loaded for Gradio app.")
     except gr.Error as e:
         print(f"Gradio Error during initial data processing: {e}")
-        # Allow the app to start but indicate data is not ready
-        global_df = pd.DataFrame() # Empty DataFrame to prevent errors in plotting
     except Exception as e:
         print(f"Unexpected error during initial data processing: {e}")
-        global_df = pd.DataFrame() # Empty DataFrame
 # === Decision Prediction Function for Gradio Interface ===
@@ -218,26 +220,29 @@ def predict_decision(total_repairs, last_10w_miles, odo_diff, cpm, purchase_amou
     Predicts the decision for a single truck based on user inputs.
     Uses globally pre-calculated means for missing values if inputs are None.
     """
-    # Ensure inputs are numeric where expected, handle potential None/empty string from Gradio
     total_repairs = float(total_repairs) if total_repairs is not None else 0.0
     last_10w_miles = float(last_10w_miles) if last_10w_miles is not None else 0.0
     odo_diff = float(odo_diff) if odo_diff is not None else 0.0
     cpm = float(cpm) if cpm is not None else 0.0
-    # Use global means for financial values if user input is None
-    purchase_amount = float(purchase_amount) if purchase_amount is not None else global_purchase_amount_mean
-    fair_market_value = float(fair_market_value) if fair_market_value is not None else global_fair_market_value_mean
-    monthly_payment = float(monthly_payment) if monthly_payment is not None else global_monthly_payment_mean
     ownership_type_str = ownership_type_str.strip().upper() if ownership_type_str is not None else "UNKNOWN"
     make = make.strip().upper() if make is not None else "UNKNOWN"
     # For avg_resale_value, try to get it from the pre-calculated global_brand_resale, else use global mean
-    avg_resale_value_lookup = global_brand_resale.loc[global_brand_resale['truck_brand'] == make, 'avg_resale_value'].values if global_brand_resale is not None else []
-    if len(avg_resale_value_lookup) > 0:
-        avg_resale_value = avg_resale_value_lookup[0]
-    else:
-        avg_resale_value = global_brand_resale_mean # Use overall mean if brand not found or data not loaded
     # Apply the same logic as make_decision, but directly with the input variables
     # 1. Scrap:
@@ -280,9 +285,10 @@ def generate_plots():
     Generates various plots from the processed global_df and returns them as base64 encoded images.
     """
     if global_df is None or global_df.empty:
-        return "Error: Data not loaded or is empty. Please ensure input files are present and valid."
-    plot_outputs = []
     # Plot 1: Decision Breakdown
     try:
@@ -295,26 +301,33 @@ def generate_plots():
         buf = io.BytesIO()
         plt.savefig(buf, format='png')
         plt.close()
-        plot_outputs.append(gr.Image(value=buf.getvalue(), label="Decision Breakdown")._data)
     except Exception as e:
-        plot_outputs.append(f"Error generating Decision Breakdown plot: {e}")
     # Plot 2: Total Repairs by Ownership Type
     try:
         plt.figure(figsize=(12, 7))
-        sns.boxplot(data=global_df, x='ownership_type', y='total_repairs', palette='coolwarm')
-        plt.title('Total Repairs by Ownership Type')
-        plt.xlabel('Ownership Type')
-        plt.ylabel('Total Repairs ($)')
-        plt.xticks(rotation=45, ha='right')
-        plt.grid(axis='y', linestyle='--', alpha=0.7)
-        plt.tight_layout()
-        buf = io.BytesIO()
-        plt.savefig(buf, format='png')
-        plt.close()
-        plot_outputs.append(gr.Image(value=buf.getvalue(), label="Total Repairs by Ownership Type")._data)
     except Exception as e:
-        plot_outputs.append(f"Error generating Total Repairs plot: {e}")
     # Plot 3: Last 10 Weeks Miles Distribution
     try:
@@ -327,35 +340,42 @@ def generate_plots():
         buf = io.BytesIO()
         plt.savefig(buf, format='png')
         plt.close()
-        plot_outputs.append(gr.Image(value=buf.getvalue(), label="Last 10 Weeks Miles Distribution")._data)
     except Exception as e:
-        plot_outputs.append(f"Error generating Miles Distribution plot: {e}")
     # Plot 4: Fair Market Value vs. Purchase Amount
     try:
         plt.figure(figsize=(10, 7))
-        sns.scatterplot(data=global_df, x='purchase_amount', y='fair_market_value', hue='Decision', palette='deep', alpha=0.7)
-        plt.title('Fair Market Value vs. Purchase Amount by Decision')
-        plt.xlabel('Purchase Amount ($)')
-        plt.ylabel('Fair Market Value ($)')
-        plt.grid(linestyle='--', alpha=0.7)
-        plt.tight_layout()
-        buf = io.BytesIO()
-        plt.savefig(buf, format='png')
-        plt.close()
-        plot_outputs.append(gr.Image(value=buf.getvalue(), label="Fair Market Value vs. Purchase Amount")._data)
     except Exception as e:
-        plot_outputs.append(f"Error generating FMV vs Purchase plot: {e}")
-    return plot_outputs
 # --- Initial Data Loading and Processing Call ---
-# This will run once when the Gradio app starts up
 initial_data_processing()
 # --- Gradio Interface Definition ---
 # Define inputs for the Decision Predictor tab
 decision_inputs = [
     gr.Number(label="Total Repairs ($)", value=0.0),
     gr.Number(label="Last 10 Weeks Miles", value=0.0),
@@ -364,7 +384,7 @@ decision_inputs = [
     gr.Number(label="Purchase Amount ($)", value=0.0),
     gr.Number(label="Fair Market Value ($)", value=0.0),
     gr.Number(label="Monthly Payment ($)", value=0.0),
-    gr.Dropdown(label="Ownership Type", choices=global_ownership_types if global_ownership_types else ["OWNER OPERATOR OWNED", "OPERATING LEASE", "FINANCED", "LEASE PURCHASE", "RENTAL", "FMV LEASE", "NAN"], value="OWNER OPERATOR OWNED"),
     gr.Textbox(label="Make (e.g., FORD)", value="FORD")
 ]
@@ -397,7 +417,8 @@ with gr.Blocks() as demo:
         plot_button = gr.Button("Generate Plots")
         # Output components for plots
-        plot_outputs = [
             gr.Image(label="Decision Breakdown", interactive=False, visible=True),
             gr.Image(label="Total Repairs by Ownership Type", interactive=False, visible=True),
             gr.Image(label="Last 10 Weeks Miles Distribution", interactive=False, visible=True),
@@ -407,7 +428,7 @@ with gr.Blocks() as demo:
         plot_button.click(
             fn=generate_plots,
             inputs=[],
-            outputs=plot_outputs
         )
 # Launch the Gradio app

 import base64
 # --- Global Variables to store processed data ---
 global_df = None
 global_brand_resale = None
 global_brand_resale_mean = 0
 global_fair_market_value_mean = 0
 global_purchase_amount_mean = 0
 global_monthly_payment_mean = 0
+global_ownership_types = []
 # === Truck ID Cleaner ===
 def clean_truck_id(val):
         return finance, maintenance, distance, odometer, stub, paper
     except FileNotFoundError as e:
         print(f"Error: One or more input files not found. Please ensure all Excel files are in the same directory as the script. Missing file: {e.filename}")
         raise gr.Error(f"Required file not found: {e.filename}. Please upload all necessary Excel files.")
     except Exception as e:
         print(f"An unexpected error occurred during data loading: {e}")
         # --- Standardize 'ownership_type' ---
         df['ownership_type'] = df['ownership_type'].astype(str).str.strip().str.upper()
+        global_ownership_types = df['ownership_type'].unique().tolist()
+        # Ensure 'NAN' is handled if it appears due to missing ownership types
+        if 'NAN' in global_ownership_types:
+            global_ownership_types.remove('NAN')
+        global_ownership_types.sort() # Sort for better display in dropdown
+        # Calculate means for imputation, handling potential NaN means if column is all NaN
+        global_fair_market_value_mean = df['fair_market_value'].mean()
+        global_purchase_amount_mean = df['purchase_amount'].mean()
+        global_monthly_payment_mean = df['monthly_payment'].mean()
         # --- Handle NaNs for decision-making columns ---
         df["total_repairs"] = df["total_repairs"].fillna(0)
         df["odo_diff"] = df["odo_diff"].fillna(0).apply(lambda x: 0 if x < 0 else x)
         df["avg_resale_value"] = df["avg_resale_value"].fillna(global_brand_resale_mean if not pd.isna(global_brand_resale_mean) else 0)
         df["fair_market_value"] = df["fair_market_value"].fillna(global_fair_market_value_mean if not pd.isna(global_fair_market_value_mean) else 0)
         df["purchase_amount"] = df["purchase_amount"].fillna(global_purchase_amount_mean if not pd.isna(global_purchase_amount_mean) else 0)
         df["Decision"] = df.apply(make_decision_for_df, axis=1)
+        global_df = df.copy() # Make a copy to avoid SettingWithCopyWarning if modified later
         print("Initial data processing complete. Data loaded for Gradio app.")
     except gr.Error as e:
         print(f"Gradio Error during initial data processing: {e}")
+        # If an error occurs, ensure global_df is an empty DataFrame to prevent further errors
+        global_df = pd.DataFrame()
     except Exception as e:
         print(f"Unexpected error during initial data processing: {e}")
+        global_df = pd.DataFrame()
 # === Decision Prediction Function for Gradio Interface ===
     Predicts the decision for a single truck based on user inputs.
     Uses globally pre-calculated means for missing values if inputs are None.
     """
+    # Handle potentially None inputs from Gradio and ensure numeric types
     total_repairs = float(total_repairs) if total_repairs is not None else 0.0
     last_10w_miles = float(last_10w_miles) if last_10w_miles is not None else 0.0
     odo_diff = float(odo_diff) if odo_diff is not None else 0.0
     cpm = float(cpm) if cpm is not None else 0.0
+    # Use global means for financial values if user input is None, and ensure they are float
+    purchase_amount = float(purchase_amount) if purchase_amount is not None else (global_purchase_amount_mean if not pd.isna(global_purchase_amount_mean) else 0.0)
+    fair_market_value = float(fair_market_value) if fair_market_value is not None else (global_fair_market_value_mean if not pd.isna(global_fair_market_value_mean) else 0.0)
+    monthly_payment = float(monthly_payment) if monthly_payment is not None else (global_monthly_payment_mean if not pd.isna(global_monthly_payment_mean) else 0.0)
     ownership_type_str = ownership_type_str.strip().upper() if ownership_type_str is not None else "UNKNOWN"
     make = make.strip().upper() if make is not None else "UNKNOWN"
     # For avg_resale_value, try to get it from the pre-calculated global_brand_resale, else use global mean
+    avg_resale_value = 0.0 # Default if global_brand_resale is not loaded
+    if global_brand_resale is not None:
+        avg_resale_value_lookup = global_brand_resale.loc[global_brand_resale['truck_brand'] == make, 'avg_resale_value'].values
+        if len(avg_resale_value_lookup) > 0:
+            avg_resale_value = avg_resale_value_lookup[0]
+        else:
+            avg_resale_value = global_brand_resale_mean if not pd.isna(global_brand_resale_mean) else 0.0
     # Apply the same logic as make_decision, but directly with the input variables
     # 1. Scrap:
     Generates various plots from the processed global_df and returns them as base64 encoded images.
     """
     if global_df is None or global_df.empty:
+        # Return a list of None values for the images if data is not loaded
+        return [None, None, None, None]
+    plot_buffers = [] # Store image bytes here
     # Plot 1: Decision Breakdown
     try:
         buf = io.BytesIO()
         plt.savefig(buf, format='png')
         plt.close()
+        plot_buffers.append(buf.getvalue())
     except Exception as e:
+        print(f"Error generating Decision Breakdown plot: {e}")
+        plot_buffers.append(None) # Append None if plot generation fails
     # Plot 2: Total Repairs by Ownership Type
     try:
         plt.figure(figsize=(12, 7))
+        # Filter out NaN/None ownership types if any remain for plotting robustness
+        plot_df = global_df[global_df['ownership_type'].notna() & (global_df['ownership_type'] != 'NAN')]
+        if not plot_df.empty:
+            sns.boxplot(data=plot_df, x='ownership_type', y='total_repairs', palette='coolwarm')
+            plt.title('Total Repairs by Ownership Type')
+            plt.xlabel('Ownership Type')
+            plt.ylabel('Total Repairs ($)')
+            plt.xticks(rotation=45, ha='right')
+            plt.grid(axis='y', linestyle='--', alpha=0.7)
+            plt.tight_layout()
+            buf = io.BytesIO()
+            plt.savefig(buf, format='png')
+            plt.close()
+            plot_buffers.append(buf.getvalue())
+        else:
+            plot_buffers.append(None)
     except Exception as e:
+        print(f"Error generating Total Repairs plot: {e}")
+        plot_buffers.append(None)
     # Plot 3: Last 10 Weeks Miles Distribution
     try:
         buf = io.BytesIO()
         plt.savefig(buf, format='png')
         plt.close()
+        plot_buffers.append(buf.getvalue())
     except Exception as e:
+        print(f"Error generating Miles Distribution plot: {e}")
+        plot_buffers.append(None)
     # Plot 4: Fair Market Value vs. Purchase Amount
     try:
         plt.figure(figsize=(10, 7))
+        # Ensure columns are numeric and handle potential NaNs for plotting
+        plot_df = global_df.dropna(subset=['purchase_amount', 'fair_market_value', 'Decision'])
+        if not plot_df.empty:
+            sns.scatterplot(data=plot_df, x='purchase_amount', y='fair_market_value', hue='Decision', palette='deep', alpha=0.7)
+            plt.title('Fair Market Value vs. Purchase Amount by Decision')
+            plt.xlabel('Purchase Amount ($)')
+            plt.ylabel('Fair Market Value ($)')
+            plt.grid(linestyle='--', alpha=0.7)
+            plt.tight_layout()
+            buf = io.BytesIO()
+            plt.savefig(buf, format='png')
+            plt.close()
+            plot_buffers.append(buf.getvalue())
+        else:
+            plot_buffers.append(None)
     except Exception as e:
+        print(f"Error generating FMV vs Purchase plot: {e}")
+        plot_buffers.append(None)
+    return plot_buffers
 # --- Initial Data Loading and Processing Call ---
 initial_data_processing()
 # --- Gradio Interface Definition ---
 # Define inputs for the Decision Predictor tab
+# Use the dynamically populated global_ownership_types for the dropdown choices
 decision_inputs = [
     gr.Number(label="Total Repairs ($)", value=0.0),
     gr.Number(label="Last 10 Weeks Miles", value=0.0),
     gr.Number(label="Purchase Amount ($)", value=0.0),
     gr.Number(label="Fair Market Value ($)", value=0.0),
     gr.Number(label="Monthly Payment ($)", value=0.0),
+    gr.Dropdown(label="Ownership Type", choices=global_ownership_types, value=global_ownership_types[0] if global_ownership_types else "OWNER OPERATOR OWNED"),
     gr.Textbox(label="Make (e.g., FORD)", value="FORD")
 ]
         plot_button = gr.Button("Generate Plots")
         # Output components for plots
+        # These are just placeholders; the generate_plots function will return the actual image bytes
+        plot_outputs_components = [
             gr.Image(label="Decision Breakdown", interactive=False, visible=True),
             gr.Image(label="Total Repairs by Ownership Type", interactive=False, visible=True),
             gr.Image(label="Last 10 Weeks Miles Distribution", interactive=False, visible=True),
         plot_button.click(
             fn=generate_plots,
             inputs=[],
+            outputs=plot_outputs_components
         )
 # Launch the Gradio app