Spaces:

ans123
/

Truck_Analysis

Sleeping

App Files Files Community

ans123 commited on 4 days ago

Commit

ad31317

verified ·

1 Parent(s): 2122e24

Create app.py

Browse files

Files changed (1) hide show

app.py +415 -0

app.py ADDED Viewed

	@@ -0,0 +1,415 @@

+import pandas as pd
+import numpy as np
+import gradio as gr
+import matplotlib.pyplot as plt
+import seaborn as sns
+import io
+import base64
+# --- Global Variables to store processed data ---
+# These will be populated once when the Gradio app starts
+global_df = None
+global_brand_resale = None
+global_brand_resale_mean = 0
+global_fair_market_value_mean = 0
+global_purchase_amount_mean = 0
+global_monthly_payment_mean = 0
+global_ownership_types = [] # To populate the dropdown for ownership type
+# === Truck ID Cleaner ===
+def clean_truck_id(val):
+    """
+    Cleans and standardizes truck IDs by removing "SPOT-" prefix and stripping whitespace.
+    Handles NaN values by returning None.
+    """
+    if pd.isna(val):
+        return None
+    return str(val).strip().replace("SPOT-", "")
+# === Load and Prepare Data ===
+def load_and_clean_data():
+    """
+    Loads data from various Excel files, performs initial cleaning,
+    and converts relevant columns to appropriate data types (numeric, datetime).
+    Includes error handling for missing files.
+    """
+    try:
+        # Load files
+        finance = pd.read_excel("truck-finance.xlsx")
+        maintenance = pd.read_excel("maintenancepo-truck.xlsx")
+        distance = pd.read_excel("vehicle-distance-traveled.xlsx")
+        odometer = pd.read_excel("truck-odometer-data-week.xlsx")
+        stub = pd.read_excel("stub-data.xlsx")
+        paper = pd.read_excel("truck-paper.xlsx")
+        # --- Explicitly convert relevant columns to numeric and datetime ---
+        # Coerce errors will turn unparseable values into NaN
+        finance['fair_market_value'] = pd.to_numeric(finance['fair_market_value'], errors='coerce')
+        finance['purchase_amount'] = pd.to_numeric(finance['purchase_amount'], errors='coerce')
+        finance['monthly_payment'] = pd.to_numeric(finance['monthly_payment'], errors='coerce')
+        maintenance['amount'] = pd.to_numeric(maintenance['amount'], errors='coerce')
+        distance['date'] = pd.to_datetime(distance['date'], errors='coerce')
+        distance['distance'] = pd.to_numeric(distance['distance'], errors='coerce')
+        odometer['pay_date'] = pd.to_datetime(odometer['pay_date'], errors='coerce')
+        odometer['odometer'] = pd.to_numeric(odometer['odometer'], errors='coerce')
+        paper['truck_price'] = pd.to_numeric(paper['truck_price'], errors='coerce')
+        print("Finance columns after loading:", finance.columns)
+        print("Maintenance columns after loading:", maintenance.columns)
+        print("Stub columns after loading:", stub.columns)
+        print("Distance columns after loading:", distance.columns)
+        print("Odometer columns after loading:", odometer.columns)
+        # --- Clean & Standardize IDs ---
+        finance["truck_id"] = finance["unit_id"].apply(clean_truck_id)
+        maintenance["truck_id"] = maintenance["unit_id"].apply(clean_truck_id)
+        stub["truck_id"] = stub["TRUCK"].apply(clean_truck_id)
+        odometer["truck_id"] = odometer["unit_id"].apply(clean_truck_id)
+        distance["truck_id"] = distance["unit_id"].apply(clean_truck_id)
+        return finance, maintenance, distance, odometer, stub, paper
+    except FileNotFoundError as e:
+        print(f"Error: One or more input files not found. Please ensure all Excel files are in the same directory as the script. Missing file: {e.filename}")
+        # In a Gradio app, sys.exit() would stop the server. Instead, return None or raise a specific error.
+        raise gr.Error(f"Required file not found: {e.filename}. Please upload all necessary Excel files.")
+    except Exception as e:
+        print(f"An unexpected error occurred during data loading: {e}")
+        raise gr.Error(f"An error occurred during data loading: {e}")
+# === Initial Data Processing (called once at app startup) ===
+def initial_data_processing():
+    """
+    Loads, cleans, merges, and prepares all data for the Gradio app.
+    Populates global variables used by prediction and plotting functions.
+    """
+    global global_df, global_brand_resale, global_brand_resale_mean, \
+           global_fair_market_value_mean, global_purchase_amount_mean, \
+           global_monthly_payment_mean, global_ownership_types
+    try:
+        finance, maintenance, distance, odometer, stub, paper = load_and_clean_data()
+        # --- Maintenance Summary ---
+        maintenance_summary = maintenance.groupby("truck_id").agg(
+            total_repairs=("amount", "sum"),
+            shop_visits=("truck_id", "count")
+        ).reset_index()
+        # --- Stub Usage ---
+        stub_summary = stub.groupby("truck_id").agg(
+            usage_records=("truck_id", "count")
+        ).reset_index()
+        # --- 10-Week Distance Summary ---
+        latest = distance['date'].max()
+        last10 = distance[distance['date'].notna() & (distance['date'] >= (latest - pd.Timedelta(weeks=10)))]
+        distance_summary = last10.groupby("truck_id").agg(
+            last_10w_miles=('distance', 'sum')
+        ).reset_index()
+        # --- Odometer Summary ---
+        odometer_cleaned = odometer[odometer['pay_date'].notna() & odometer['odometer'].notna()]
+        odo_summary = odometer_cleaned.sort_values(['truck_id', 'pay_date']).groupby("truck_id").agg(
+            odo_start=('odometer', 'first'),
+            odo_end=('odometer', 'last')
+        ).reset_index()
+        odo_summary["odo_diff"] = odo_summary["odo_end"] - odo_summary["odo_start"]
+        # --- Resale Values (avg per make) ---
+        paper['truck_brand'] = paper['truck_brand'].str.upper()
+        global_brand_resale = paper.groupby("truck_brand").agg(
+            avg_resale_value=('truck_price', 'mean')
+        ).reset_index()
+        global_brand_resale_mean = global_brand_resale['avg_resale_value'].mean()
+        # --- Merge All Sources ---
+        df = finance.merge(maintenance_summary, on="truck_id", how="left")
+        df = df.merge(stub_summary, on="truck_id", how="left")
+        df = df.merge(distance_summary, on="truck_id", how="left")
+        df = df.merge(odo_summary[['truck_id', 'odo_diff']], on="truck_id", how="left")
+        df['make'] = df['make'].str.upper()
+        df = df.merge(global_brand_resale, left_on='make', right_on='truck_brand', how='left')
+        df.drop(columns=['truck_brand'], inplace=True)
+        # --- Standardize 'ownership_type' ---
+        df['ownership_type'] = df['ownership_type'].astype(str).str.strip().str.upper()
+        global_ownership_types = df['ownership_type'].unique().tolist() # Store for Gradio dropdown
+        # --- Handle NaNs for decision-making columns ---
+        df["total_repairs"] = df["total_repairs"].fillna(0)
+        df["shop_visits"] = df["shop_visits"].fillna(0)
+        df["usage_records"] = df["usage_records"].fillna(0)
+        df["last_10w_miles"] = df["last_10w_miles"].fillna(0)
+        df["odo_diff"] = df["odo_diff"].fillna(0).apply(lambda x: 0 if x < 0 else x)
+        # Calculate means for imputation, handling potential NaN means if column is all NaN
+        global_fair_market_value_mean = df['fair_market_value'].mean()
+        global_purchase_amount_mean = df['purchase_amount'].mean()
+        global_monthly_payment_mean = df['monthly_payment'].mean()
+        df["avg_resale_value"] = df["avg_resale_value"].fillna(global_brand_resale_mean if not pd.isna(global_brand_resale_mean) else 0)
+        df["fair_market_value"] = df["fair_market_value"].fillna(global_fair_market_value_mean if not pd.isna(global_fair_market_value_mean) else 0)
+        df["purchase_amount"] = df["purchase_amount"].fillna(global_purchase_amount_mean if not pd.isna(global_purchase_amount_mean) else 0)
+        df["monthly_payment"] = df["monthly_payment"].fillna(global_monthly_payment_mean if not pd.isna(global_monthly_payment_mean) else 0)
+        # --- Add CPM ---
+        # Replace odo_diff = 0 with 1 for CPM calculation to avoid division by zero and get non-zero CPM
+        df['odo_diff_for_cpm'] = df['odo_diff'].replace(0, 1)
+        df["CPM"] = df["total_repairs"] / df["odo_diff_for_cpm"]
+        df["CPM"] = df["CPM"].replace([np.inf, -np.inf], np.nan)
+        df["CPM"] = df["CPM"].fillna(0)
+        # --- Apply decision logic to the full dataset for plotting the breakdown ---
+        def make_decision_for_df(row):
+            # This is the same logic as before, applied to the full DataFrame
+            # 1. Scrap:
+            if (row['total_repairs'] > 8000 and
+                row['last_10w_miles'] < 500 and
+                row['odo_diff'] > 70000 and
+                row['CPM'] > 0.2 and
+                row['purchase_amount'] < 20000):
+                return "Scrap"
+            # 2. Sell:
+            elif (row['total_repairs'] > 5000 and
+                  row['last_10w_miles'] < 1000 and
+                  row['fair_market_value'] > row['purchase_amount'] and
+                  row['odo_diff'] > 50000):
+                return "Sell"
+            # 3. Lease:
+            elif (row['ownership_type'] == 'OPERATING LEASE' and
+                  row['monthly_payment'] > 600 and
+                  row['purchase_amount'] < 30000 and
+                  row['fair_market_value'] > 28000 and
+                  row['odo_diff'] < 40000):
+                return "Lease"
+            # 4. Keep:
+            elif (row['total_repairs'] < 3000 and
+                  row['last_10w_miles'] > 2000 and
+                  row['fair_market_value'] < row['purchase_amount'] and
+                  row['odo_diff'] < 30000):
+                return "Keep"
+            # 5. Analyze: Default
+            else:
+                return "Analyze"
+        df["Decision"] = df.apply(make_decision_for_df, axis=1)
+        global_df = df # Store the fully processed DataFrame globally for plotting
+        print("Initial data processing complete. Data loaded for Gradio app.")
+    except gr.Error as e:
+        print(f"Gradio Error during initial data processing: {e}")
+        # Allow the app to start but indicate data is not ready
+        global_df = pd.DataFrame() # Empty DataFrame to prevent errors in plotting
+    except Exception as e:
+        print(f"Unexpected error during initial data processing: {e}")
+        global_df = pd.DataFrame() # Empty DataFrame
+# === Decision Prediction Function for Gradio Interface ===
+def predict_decision(total_repairs, last_10w_miles, odo_diff, cpm, purchase_amount, fair_market_value, monthly_payment, ownership_type_str, make):
+    """
+    Predicts the decision for a single truck based on user inputs.
+    Uses globally pre-calculated means for missing values if inputs are None.
+    """
+    # Ensure inputs are numeric where expected, handle potential None/empty string from Gradio
+    total_repairs = float(total_repairs) if total_repairs is not None else 0.0
+    last_10w_miles = float(last_10w_miles) if last_10w_miles is not None else 0.0
+    odo_diff = float(odo_diff) if odo_diff is not None else 0.0
+    cpm = float(cpm) if cpm is not None else 0.0
+    # Use global means for financial values if user input is None
+    purchase_amount = float(purchase_amount) if purchase_amount is not None else global_purchase_amount_mean
+    fair_market_value = float(fair_market_value) if fair_market_value is not None else global_fair_market_value_mean
+    monthly_payment = float(monthly_payment) if monthly_payment is not None else global_monthly_payment_mean
+    ownership_type_str = ownership_type_str.strip().upper() if ownership_type_str is not None else "UNKNOWN"
+    make = make.strip().upper() if make is not None else "UNKNOWN"
+    # For avg_resale_value, try to get it from the pre-calculated global_brand_resale, else use global mean
+    avg_resale_value_lookup = global_brand_resale.loc[global_brand_resale['truck_brand'] == make, 'avg_resale_value'].values if global_brand_resale is not None else []
+    if len(avg_resale_value_lookup) > 0:
+        avg_resale_value = avg_resale_value_lookup[0]
+    else:
+        avg_resale_value = global_brand_resale_mean # Use overall mean if brand not found or data not loaded
+    # Apply the same logic as make_decision, but directly with the input variables
+    # 1. Scrap:
+    if (total_repairs > 8000 and
+        last_10w_miles < 500 and
+        odo_diff > 70000 and
+        cpm > 0.2 and
+        purchase_amount < 20000):
+        return "Scrap"
+    # 2. Sell:
+    elif (total_repairs > 5000 and
+          last_10w_miles < 1000 and
+          fair_market_value > purchase_amount and
+          odo_diff > 50000):
+        return "Sell"
+    # 3. Lease:
+    elif (ownership_type_str == 'OPERATING LEASE' and
+          monthly_payment > 600 and
+          purchase_amount < 30000 and
+          fair_market_value > 28000 and
+          odo_diff < 40000):
+        return "Lease"
+    # 4. Keep:
+    elif (total_repairs < 3000 and
+          last_10w_miles > 2000 and
+          fair_market_value < purchase_amount and
+          odo_diff < 30000):
+        return "Keep"
+    # 5. Analyze: Default
+    else:
+        return "Analyze"
+# === Plot Generation Function for Gradio Interface ===
+def generate_plots():
+    """
+    Generates various plots from the processed global_df and returns them as base64 encoded images.
+    """
+    if global_df is None or global_df.empty:
+        return "Error: Data not loaded or is empty. Please ensure input files are present and valid."
+    plot_outputs = []
+    # Plot 1: Decision Breakdown
+    try:
+        plt.figure(figsize=(8, 6))
+        sns.countplot(data=global_df, x='Decision', palette='viridis', order=global_df['Decision'].value_counts().index)
+        plt.title('Decision Breakdown for the Fleet')
+        plt.xlabel('Decision')
+        plt.ylabel('Number of Trucks')
+        plt.grid(axis='y', linestyle='--', alpha=0.7)
+        buf = io.BytesIO()
+        plt.savefig(buf, format='png')
+        plt.close()
+        plot_outputs.append(gr.Image(value=buf.getvalue(), label="Decision Breakdown")._data)
+    except Exception as e:
+        plot_outputs.append(f"Error generating Decision Breakdown plot: {e}")
+    # Plot 2: Total Repairs by Ownership Type
+    try:
+        plt.figure(figsize=(12, 7))
+        sns.boxplot(data=global_df, x='ownership_type', y='total_repairs', palette='coolwarm')
+        plt.title('Total Repairs by Ownership Type')
+        plt.xlabel('Ownership Type')
+        plt.ylabel('Total Repairs ($)')
+        plt.xticks(rotation=45, ha='right')
+        plt.grid(axis='y', linestyle='--', alpha=0.7)
+        plt.tight_layout()
+        buf = io.BytesIO()
+        plt.savefig(buf, format='png')
+        plt.close()
+        plot_outputs.append(gr.Image(value=buf.getvalue(), label="Total Repairs by Ownership Type")._data)
+    except Exception as e:
+        plot_outputs.append(f"Error generating Total Repairs plot: {e}")
+    # Plot 3: Last 10 Weeks Miles Distribution
+    try:
+        plt.figure(figsize=(10, 6))
+        sns.histplot(data=global_df, x='last_10w_miles', bins=30, kde=True, color='skyblue')
+        plt.title('Distribution of Last 10 Weeks Miles')
+        plt.xlabel('Last 10 Weeks Miles')
+        plt.ylabel('Number of Trucks')
+        plt.grid(axis='y', linestyle='--', alpha=0.7)
+        buf = io.BytesIO()
+        plt.savefig(buf, format='png')
+        plt.close()
+        plot_outputs.append(gr.Image(value=buf.getvalue(), label="Last 10 Weeks Miles Distribution")._data)
+    except Exception as e:
+        plot_outputs.append(f"Error generating Miles Distribution plot: {e}")
+    # Plot 4: Fair Market Value vs. Purchase Amount
+    try:
+        plt.figure(figsize=(10, 7))
+        sns.scatterplot(data=global_df, x='purchase_amount', y='fair_market_value', hue='Decision', palette='deep', alpha=0.7)
+        plt.title('Fair Market Value vs. Purchase Amount by Decision')
+        plt.xlabel('Purchase Amount ($)')
+        plt.ylabel('Fair Market Value ($)')
+        plt.grid(linestyle='--', alpha=0.7)
+        plt.tight_layout()
+        buf = io.BytesIO()
+        plt.savefig(buf, format='png')
+        plt.close()
+        plot_outputs.append(gr.Image(value=buf.getvalue(), label="Fair Market Value vs. Purchase Amount")._data)
+    except Exception as e:
+        plot_outputs.append(f"Error generating FMV vs Purchase plot: {e}")
+    return plot_outputs
+# --- Initial Data Loading and Processing Call ---
+# This will run once when the Gradio app starts up
+initial_data_processing()
+# --- Gradio Interface Definition ---
+# Define inputs for the Decision Predictor tab
+decision_inputs = [
+    gr.Number(label="Total Repairs ($)", value=0.0),
+    gr.Number(label="Last 10 Weeks Miles", value=0.0),
+    gr.Number(label="Odometer Difference (odo_diff)", value=0.0),
+    gr.Number(label="Cost Per Mile (CPM)", value=0.0),
+    gr.Number(label="Purchase Amount ($)", value=0.0),
+    gr.Number(label="Fair Market Value ($)", value=0.0),
+    gr.Number(label="Monthly Payment ($)", value=0.0),
+    gr.Dropdown(label="Ownership Type", choices=global_ownership_types if global_ownership_types else ["OWNER OPERATOR OWNED", "OPERATING LEASE", "FINANCED", "LEASE PURCHASE", "RENTAL", "FMV LEASE", "NAN"], value="OWNER OPERATOR OWNED"),
+    gr.Textbox(label="Make (e.g., FORD)", value="FORD")
+]
+# Create the Gradio Interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Truck Evaluation Application")
+    gr.Markdown("Use this app to predict truck decisions and visualize fleet data.")
+    with gr.Tab("Decision Predictor"):
+        gr.Markdown("## Predict Truck Decision")
+        gr.Markdown("Enter the details for a single truck to get a decision.")
+        with gr.Row():
+            for input_comp in decision_inputs:
+                input_comp.render()
+        predict_button = gr.Button("Get Decision")
+        decision_output = gr.Textbox(label="Decision", interactive=False)
+        predict_button.click(
+            fn=predict_decision,
+            inputs=decision_inputs,
+            outputs=decision_output
+        )
+    with gr.Tab("Data Visualizations"):
+        gr.Markdown("## Fleet Data Visualizations")
+        gr.Markdown("Explore insights from your truck data.")
+        plot_button = gr.Button("Generate Plots")
+        # Output components for plots
+        plot_outputs = [
+            gr.Image(label="Decision Breakdown", interactive=False, visible=True),
+            gr.Image(label="Total Repairs by Ownership Type", interactive=False, visible=True),
+            gr.Image(label="Last 10 Weeks Miles Distribution", interactive=False, visible=True),
+            gr.Image(label="Fair Market Value vs. Purchase Amount", interactive=False, visible=True)
+        ]
+        plot_button.click(
+            fn=generate_plots,
+            inputs=[],
+            outputs=plot_outputs
+        )
+# Launch the Gradio app
+if __name__ == "__main__":
+    demo.launch()