Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
import gradio as gr | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import io | |
import base64 | |
# --- Global Variables to store processed data --- | |
global_df = None | |
global_brand_resale = None | |
global_brand_resale_mean = 0 | |
global_fair_market_value_mean = 0 | |
global_purchase_amount_mean = 0 | |
global_monthly_payment_mean = 0 | |
global_ownership_types = [] | |
# === Truck ID Cleaner === | |
def clean_truck_id(val): | |
""" | |
Cleans and standardizes truck IDs by removing "SPOT-" prefix and stripping whitespace. | |
Handles NaN values by returning None. | |
""" | |
if pd.isna(val): | |
return None | |
return str(val).strip().replace("SPOT-", "") | |
# === Load and Prepare Data === | |
def load_and_clean_data(): | |
""" | |
Loads data from various Excel files, performs initial cleaning, | |
and converts relevant columns to appropriate data types (numeric, datetime). | |
Includes error handling for missing files. | |
""" | |
try: | |
# Load files | |
finance = pd.read_excel("truck-finance.xlsx") | |
maintenance = pd.read_excel("maintenancepo-truck.xlsx") | |
distance = pd.read_excel("vehicle-distance-traveled.xlsx") | |
odometer = pd.read_excel("truck-odometer-data-week.xlsx") | |
stub = pd.read_excel("stub-data.xlsx") | |
paper = pd.read_excel("truck-paper.xlsx") | |
# --- Explicitly convert relevant columns to numeric and datetime --- | |
# Coerce errors will turn unparseable values into NaN | |
finance['fair_market_value'] = pd.to_numeric(finance['fair_market_value'], errors='coerce') | |
finance['purchase_amount'] = pd.to_numeric(finance['purchase_amount'], errors='coerce') | |
finance['monthly_payment'] = pd.to_numeric(finance['monthly_payment'], errors='coerce') | |
maintenance['amount'] = pd.to_numeric(maintenance['amount'], errors='coerce') | |
distance['date'] = pd.to_datetime(distance['date'], errors='coerce') | |
distance['distance'] = pd.to_numeric(distance['distance'], errors='coerce') | |
odometer['pay_date'] = pd.to_datetime(odometer['pay_date'], errors='coerce') | |
odometer['odometer'] = pd.to_numeric(odometer['odometer'], errors='coerce') | |
paper['truck_price'] = pd.to_numeric(paper['truck_price'], errors='coerce') | |
print("Finance columns after loading:", finance.columns) | |
print("Maintenance columns after loading:", maintenance.columns) | |
print("Stub columns after loading:", stub.columns) | |
print("Distance columns after loading:", distance.columns) | |
print("Odometer columns after loading:", odometer.columns) | |
# --- Clean & Standardize IDs --- | |
finance["truck_id"] = finance["unit_id"].apply(clean_truck_id) | |
maintenance["truck_id"] = maintenance["unit_id"].apply(clean_truck_id) | |
stub["truck_id"] = stub["TRUCK"].apply(clean_truck_id) | |
odometer["truck_id"] = odometer["unit_id"].apply(clean_truck_id) | |
distance["truck_id"] = distance["unit_id"].apply(clean_truck_id) | |
return finance, maintenance, distance, odometer, stub, paper | |
except FileNotFoundError as e: | |
print(f"Error: One or more input files not found. Please ensure all Excel files are in the same directory as the script. Missing file: {e.filename}") | |
raise gr.Error(f"Required file not found: {e.filename}. Please upload all necessary Excel files.") | |
except Exception as e: | |
print(f"An unexpected error occurred during data loading: {e}") | |
raise gr.Error(f"An error occurred during data loading: {e}") | |
# === Initial Data Processing (called once at app startup) === | |
def initial_data_processing(): | |
""" | |
Loads, cleans, merges, and prepares all data for the Gradio app. | |
Populates global variables used by prediction and plotting functions. | |
""" | |
global global_df, global_brand_resale, global_brand_resale_mean, \ | |
global_fair_market_value_mean, global_purchase_amount_mean, \ | |
global_monthly_payment_mean, global_ownership_types | |
try: | |
finance, maintenance, distance, odometer, stub, paper = load_and_clean_data() | |
# --- Maintenance Summary --- | |
maintenance_summary = maintenance.groupby("truck_id").agg( | |
total_repairs=("amount", "sum"), | |
shop_visits=("truck_id", "count") | |
).reset_index() | |
# --- Stub Usage --- | |
stub_summary = stub.groupby("truck_id").agg( | |
usage_records=("truck_id", "count") | |
).reset_index() | |
# --- 10-Week Distance Summary --- | |
latest = distance['date'].max() | |
last10 = distance[distance['date'].notna() & (distance['date'] >= (latest - pd.Timedelta(weeks=10)))] | |
distance_summary = last10.groupby("truck_id").agg( | |
last_10w_miles=('distance', 'sum') | |
).reset_index() | |
# --- Odometer Summary --- | |
odometer_cleaned = odometer[odometer['pay_date'].notna() & odometer['odometer'].notna()] | |
odo_summary = odometer_cleaned.sort_values(['truck_id', 'pay_date']).groupby("truck_id").agg( | |
odo_start=('odometer', 'first'), | |
odo_end=('odometer', 'last') | |
).reset_index() | |
odo_summary["odo_diff"] = odo_summary["odo_end"] - odo_summary["odo_start"] | |
# --- Resale Values (avg per make) --- | |
paper['truck_brand'] = paper['truck_brand'].str.upper() | |
global_brand_resale = paper.groupby("truck_brand").agg( | |
avg_resale_value=('truck_price', 'mean') | |
).reset_index() | |
global_brand_resale_mean = global_brand_resale['avg_resale_value'].mean() | |
# --- Merge All Sources --- | |
df = finance.merge(maintenance_summary, on="truck_id", how="left") | |
df = df.merge(stub_summary, on="truck_id", how="left") | |
df = df.merge(distance_summary, on="truck_id", how="left") | |
df = df.merge(odo_summary[['truck_id', 'odo_diff']], on="truck_id", how="left") | |
df['make'] = df['make'].str.upper() | |
df = df.merge(global_brand_resale, left_on='make', right_on='truck_brand', how='left') | |
df.drop(columns=['truck_brand'], inplace=True) | |
# --- Standardize 'ownership_type' --- | |
df['ownership_type'] = df['ownership_type'].astype(str).str.strip().str.upper() | |
global_ownership_types = df['ownership_type'].unique().tolist() | |
# Ensure 'NAN' is handled if it appears due to missing ownership types | |
if 'NAN' in global_ownership_types: | |
global_ownership_types.remove('NAN') | |
global_ownership_types.sort() # Sort for better display in dropdown | |
# Calculate means for imputation, handling potential NaN means if column is all NaN | |
global_fair_market_value_mean = df['fair_market_value'].mean() | |
global_purchase_amount_mean = df['purchase_amount'].mean() | |
global_monthly_payment_mean = df['monthly_payment'].mean() | |
# --- Handle NaNs for decision-making columns --- | |
df["total_repairs"] = df["total_repairs"].fillna(0) | |
df["shop_visits"] = df["shop_visits"].fillna(0) | |
df["usage_records"] = df["usage_records"].fillna(0) | |
df["last_10w_miles"] = df["last_10w_miles"].fillna(0) | |
df["odo_diff"] = df["odo_diff"].fillna(0).apply(lambda x: 0 if x < 0 else x) | |
df["avg_resale_value"] = df["avg_resale_value"].fillna(global_brand_resale_mean if not pd.isna(global_brand_resale_mean) else 0) | |
df["fair_market_value"] = df["fair_market_value"].fillna(global_fair_market_value_mean if not pd.isna(global_fair_market_value_mean) else 0) | |
df["purchase_amount"] = df["purchase_amount"].fillna(global_purchase_amount_mean if not pd.isna(global_purchase_amount_mean) else 0) | |
df["monthly_payment"] = df["monthly_payment"].fillna(global_monthly_payment_mean if not pd.isna(global_monthly_payment_mean) else 0) | |
# --- Add CPM --- | |
# Replace odo_diff = 0 with 1 for CPM calculation to avoid division by zero and get non-zero CPM | |
df['odo_diff_for_cpm'] = df['odo_diff'].replace(0, 1) | |
df["CPM"] = df["total_repairs"] / df["odo_diff_for_cpm"] | |
df["CPM"] = df["CPM"].replace([np.inf, -np.inf], np.nan) | |
df["CPM"] = df["CPM"].fillna(0) | |
# --- Apply decision logic to the full dataset for plotting the breakdown --- | |
def make_decision_for_df(row): | |
# This is the same logic as before, applied to the full DataFrame | |
# 1. Scrap: | |
if (row['total_repairs'] > 8000 and | |
row['last_10w_miles'] < 500 and | |
row['odo_diff'] > 70000 and | |
row['CPM'] > 0.2 and | |
row['purchase_amount'] < 20000): | |
return "Scrap" | |
# 2. Sell: | |
elif (row['total_repairs'] > 5000 and | |
row['last_10w_miles'] < 1000 and | |
row['fair_market_value'] > row['purchase_amount'] and | |
row['odo_diff'] > 50000): | |
return "Sell" | |
# 3. Lease: | |
elif (row['ownership_type'] == 'OPERATING LEASE' and | |
row['monthly_payment'] > 600 and | |
row['purchase_amount'] < 30000 and | |
row['fair_market_value'] > 28000 and | |
row['odo_diff'] < 40000): | |
return "Lease" | |
# 4. Keep: | |
elif (row['total_repairs'] < 3000 and | |
row['last_10w_miles'] > 2000 and | |
row['fair_market_value'] < row['purchase_amount'] and | |
row['odo_diff'] < 30000): | |
return "Keep" | |
# 5. Analyze: Default | |
else: | |
return "Analyze" | |
df["Decision"] = df.apply(make_decision_for_df, axis=1) | |
global_df = df.copy() # Make a copy to avoid SettingWithCopyWarning if modified later | |
print("Initial data processing complete. Data loaded for Gradio app.") | |
except gr.Error as e: | |
print(f"Gradio Error during initial data processing: {e}") | |
# If an error occurs, ensure global_df is an empty DataFrame to prevent further errors | |
global_df = pd.DataFrame() | |
except Exception as e: | |
print(f"Unexpected error during initial data processing: {e}") | |
global_df = pd.DataFrame() | |
# === Decision Prediction Function for Gradio Interface === | |
def predict_decision(total_repairs, last_10w_miles, odo_diff, cpm, purchase_amount, fair_market_value, monthly_payment, ownership_type_str, make): | |
""" | |
Predicts the decision for a single truck based on user inputs. | |
Uses globally pre-calculated means for missing values if inputs are None. | |
""" | |
# Handle potentially None inputs from Gradio and ensure numeric types | |
total_repairs = float(total_repairs) if total_repairs is not None else 0.0 | |
last_10w_miles = float(last_10w_miles) if last_10w_miles is not None else 0.0 | |
odo_diff = float(odo_diff) if odo_diff is not None else 0.0 | |
cpm = float(cpm) if cpm is not None else 0.0 | |
# Use global means for financial values if user input is None, and ensure they are float | |
purchase_amount = float(purchase_amount) if purchase_amount is not None else (global_purchase_amount_mean if not pd.isna(global_purchase_amount_mean) else 0.0) | |
fair_market_value = float(fair_market_value) if fair_market_value is not None else (global_fair_market_value_mean if not pd.isna(global_fair_market_value_mean) else 0.0) | |
monthly_payment = float(monthly_payment) if monthly_payment is not None else (global_monthly_payment_mean if not pd.isna(global_monthly_payment_mean) else 0.0) | |
ownership_type_str = ownership_type_str.strip().upper() if ownership_type_str is not None else "UNKNOWN" | |
make = make.strip().upper() if make is not None else "UNKNOWN" | |
# For avg_resale_value, try to get it from the pre-calculated global_brand_resale, else use global mean | |
avg_resale_value = 0.0 # Default if global_brand_resale is not loaded | |
if global_brand_resale is not None: | |
avg_resale_value_lookup = global_brand_resale.loc[global_brand_resale['truck_brand'] == make, 'avg_resale_value'].values | |
if len(avg_resale_value_lookup) > 0: | |
avg_resale_value = avg_resale_value_lookup[0] | |
else: | |
avg_resale_value = global_brand_resale_mean if not pd.isna(global_brand_resale_mean) else 0.0 | |
# Apply the same logic as make_decision, but directly with the input variables | |
# 1. Scrap: | |
if (total_repairs > 8000 and | |
last_10w_miles < 500 and | |
odo_diff > 70000 and | |
cpm > 0.2 and | |
purchase_amount < 20000): | |
return "Scrap" | |
# 2. Sell: | |
elif (total_repairs > 5000 and | |
last_10w_miles < 1000 and | |
fair_market_value > purchase_amount and | |
odo_diff > 50000): | |
return "Sell" | |
# 3. Lease: | |
elif (ownership_type_str == 'OPERATING LEASE' and | |
monthly_payment > 600 and | |
purchase_amount < 30000 and | |
fair_market_value > 28000 and | |
odo_diff < 40000): | |
return "Lease" | |
# 4. Keep: | |
elif (total_repairs < 3000 and | |
last_10w_miles > 2000 and | |
fair_market_value < purchase_amount and | |
odo_diff < 30000): | |
return "Keep" | |
# 5. Analyze: Default | |
else: | |
return "Analyze" | |
# === Plot Generation Function for Gradio Interface === | |
def generate_plots(): | |
""" | |
Generates various plots from the processed global_df and returns them as base64 encoded images. | |
""" | |
if global_df is None or global_df.empty: | |
# Return a list of None values for the images if data is not loaded | |
return [None, None, None, None] | |
plot_buffers = [] # Store image bytes here | |
# Plot 1: Decision Breakdown | |
try: | |
plt.figure(figsize=(8, 6)) | |
# Add hue and set legend=False to resolve FutureWarning | |
sns.countplot(data=global_df, x='Decision', hue='Decision', palette='viridis', order=global_df['Decision'].value_counts().index, legend=False) | |
plt.title('Decision Breakdown for the Fleet') | |
plt.xlabel('Decision') | |
plt.ylabel('Number of Trucks') | |
plt.grid(axis='y', linestyle='--', alpha=0.7) | |
buf = io.BytesIO() | |
plt.savefig(buf, format='png') | |
plt.close() | |
# Encode to base64 and prepend data URI | |
plot_buffers.append(base64.b64encode(buf.getvalue()).decode('utf-8')) | |
except Exception as e: | |
print(f"Error generating Decision Breakdown plot: {e}") | |
plot_buffers.append(None) # Append None if plot generation fails | |
# Plot 2: Total Repairs by Ownership Type | |
try: | |
plt.figure(figsize=(12, 7)) | |
# Filter out NaN/None ownership types if any remain for plotting robustness | |
plot_df = global_df[global_df['ownership_type'].notna() & (global_df['ownership_type'] != 'NAN')] | |
if not plot_df.empty: | |
# Add hue and set legend=False to resolve FutureWarning | |
sns.boxplot(data=plot_df, x='ownership_type', y='total_repairs', hue='ownership_type', palette='coolwarm', legend=False) | |
plt.title('Total Repairs by Ownership Type') | |
plt.xlabel('Ownership Type') | |
plt.ylabel('Total Repairs ($)') | |
plt.xticks(rotation=45, ha='right') | |
plt.grid(axis='y', linestyle='--', alpha=0.7) | |
plt.tight_layout() | |
buf = io.BytesIO() | |
plt.savefig(buf, format='png') | |
plt.close() | |
# Encode to base64 and prepend data URI | |
plot_buffers.append(base64.b64encode(buf.getvalue()).decode('utf-8')) | |
else: | |
plot_buffers.append(None) | |
except Exception as e: | |
print(f"Error generating Total Repairs plot: {e}") | |
plot_buffers.append(None) | |
# Plot 3: Last 10 Weeks Miles Distribution | |
try: | |
plt.figure(figsize=(10, 6)) | |
sns.histplot(data=global_df, x='last_10w_miles', bins=30, kde=True, color='skyblue') | |
plt.title('Distribution of Last 10 Weeks Miles') | |
plt.xlabel('Last 10 Weeks Miles') | |
plt.ylabel('Number of Trucks') | |
plt.grid(axis='y', linestyle='--', alpha=0.7) | |
buf = io.BytesIO() | |
plt.savefig(buf, format='png') | |
plt.close() | |
# Encode to base64 and prepend data URI | |
plot_buffers.append(base64.b64encode(buf.getvalue()).decode('utf-8')) | |
except Exception as e: | |
print(f"Error generating Miles Distribution plot: {e}") | |
plot_buffers.append(None) | |
# Plot 4: Fair Market Value vs. Purchase Amount | |
try: | |
plt.figure(figsize=(10, 7)) | |
# Ensure columns are numeric and handle potential NaNs for plotting | |
plot_df = global_df.dropna(subset=['purchase_amount', 'fair_market_value', 'Decision']) | |
if not plot_df.empty: | |
sns.scatterplot(data=plot_df, x='purchase_amount', y='fair_market_value', hue='Decision', palette='deep', alpha=0.7) | |
plt.title('Fair Market Value vs. Purchase Amount by Decision') | |
plt.xlabel('Purchase Amount ($)') | |
plt.ylabel('Fair Market Value ($)') | |
plt.grid(linestyle='--', alpha=0.7) | |
plt.tight_layout() | |
buf = io.BytesIO() | |
plt.savefig(buf, format='png') | |
plt.close() | |
# Encode to base64 and prepend data URI | |
plot_buffers.append(base64.b64encode(buf.getvalue()).decode('utf-8')) | |
else: | |
plot_buffers.append(None) | |
except Exception as e: | |
print(f"Error generating FMV vs Purchase plot: {e}") | |
plot_buffers.append(None) | |
return plot_buffers | |
# --- Initial Data Loading and Processing Call --- | |
initial_data_processing() | |
# --- Gradio Interface Definition --- | |
# Define inputs for the Decision Predictor tab | |
# Use the dynamically populated global_ownership_types for the dropdown choices | |
decision_inputs = [ | |
gr.Number(label="Total Repairs ($)", value=0.0), | |
gr.Number(label="Last 10 Weeks Miles", value=0.0), | |
gr.Number(label="Odometer Difference (odo_diff)", value=0.0), | |
gr.Number(label="Cost Per Mile (CPM)", value=0.0), | |
gr.Number(label="Purchase Amount ($)", value=0.0), | |
gr.Number(label="Fair Market Value ($)", value=0.0), | |
gr.Number(label="Monthly Payment ($)", value=0.0), | |
gr.Dropdown(label="Ownership Type", choices=global_ownership_types, value=global_ownership_types[0] if global_ownership_types else "OWNER OPERATOR OWNED"), | |
gr.Textbox(label="Make (e.g., FORD)", value="FORD") | |
] | |
# Create the Gradio Interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# Truck Evaluation Application") | |
gr.Markdown("Use this app to predict truck decisions and visualize fleet data.") | |
with gr.Tab("Decision Predictor"): | |
gr.Markdown("## Predict Truck Decision") | |
gr.Markdown("Enter the details for a single truck to get a decision.") | |
with gr.Row(): | |
for input_comp in decision_inputs: | |
input_comp.render() | |
predict_button = gr.Button("Get Decision") | |
decision_output = gr.Textbox(label="Decision", interactive=False) | |
predict_button.click( | |
fn=predict_decision, | |
inputs=decision_inputs, | |
outputs=decision_output | |
) | |
with gr.Tab("Data Visualizations"): | |
gr.Markdown("## Fleet Data Visualizations") | |
gr.Markdown("Explore insights from your truck data.") | |
plot_button = gr.Button("Generate Plots") | |
# Output components for plots | |
# These are just placeholders; the generate_plots function will return the actual image bytes | |
plot_outputs_components = [ | |
gr.Image(label="Decision Breakdown", interactive=False, visible=True), | |
gr.Image(label="Total Repairs by Ownership Type", interactive=False, visible=True), | |
gr.Image(label="Last 10 Weeks Miles Distribution", interactive=False, visible=True), | |
gr.Image(label="Fair Market Value vs. Purchase Amount", interactive=False, visible=True) | |
] | |
plot_button.click( | |
fn=generate_plots, | |
inputs=[], | |
outputs=plot_outputs_components | |
) | |
# Launch the Gradio app | |
if __name__ == "__main__": | |
demo.launch() |