ans123 commited on
Commit
ad31317
·
verified ·
1 Parent(s): 2122e24

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +415 -0
app.py ADDED
@@ -0,0 +1,415 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import gradio as gr
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import io
7
+ import base64
8
+
9
+ # --- Global Variables to store processed data ---
10
+ # These will be populated once when the Gradio app starts
11
+ global_df = None
12
+ global_brand_resale = None
13
+ global_brand_resale_mean = 0
14
+ global_fair_market_value_mean = 0
15
+ global_purchase_amount_mean = 0
16
+ global_monthly_payment_mean = 0
17
+ global_ownership_types = [] # To populate the dropdown for ownership type
18
+
19
+ # === Truck ID Cleaner ===
20
+ def clean_truck_id(val):
21
+ """
22
+ Cleans and standardizes truck IDs by removing "SPOT-" prefix and stripping whitespace.
23
+ Handles NaN values by returning None.
24
+ """
25
+ if pd.isna(val):
26
+ return None
27
+ return str(val).strip().replace("SPOT-", "")
28
+
29
+ # === Load and Prepare Data ===
30
+ def load_and_clean_data():
31
+ """
32
+ Loads data from various Excel files, performs initial cleaning,
33
+ and converts relevant columns to appropriate data types (numeric, datetime).
34
+ Includes error handling for missing files.
35
+ """
36
+ try:
37
+ # Load files
38
+ finance = pd.read_excel("truck-finance.xlsx")
39
+ maintenance = pd.read_excel("maintenancepo-truck.xlsx")
40
+ distance = pd.read_excel("vehicle-distance-traveled.xlsx")
41
+ odometer = pd.read_excel("truck-odometer-data-week.xlsx")
42
+ stub = pd.read_excel("stub-data.xlsx")
43
+ paper = pd.read_excel("truck-paper.xlsx")
44
+
45
+ # --- Explicitly convert relevant columns to numeric and datetime ---
46
+ # Coerce errors will turn unparseable values into NaN
47
+ finance['fair_market_value'] = pd.to_numeric(finance['fair_market_value'], errors='coerce')
48
+ finance['purchase_amount'] = pd.to_numeric(finance['purchase_amount'], errors='coerce')
49
+ finance['monthly_payment'] = pd.to_numeric(finance['monthly_payment'], errors='coerce')
50
+
51
+ maintenance['amount'] = pd.to_numeric(maintenance['amount'], errors='coerce')
52
+
53
+ distance['date'] = pd.to_datetime(distance['date'], errors='coerce')
54
+ distance['distance'] = pd.to_numeric(distance['distance'], errors='coerce')
55
+
56
+ odometer['pay_date'] = pd.to_datetime(odometer['pay_date'], errors='coerce')
57
+ odometer['odometer'] = pd.to_numeric(odometer['odometer'], errors='coerce')
58
+
59
+ paper['truck_price'] = pd.to_numeric(paper['truck_price'], errors='coerce')
60
+
61
+ print("Finance columns after loading:", finance.columns)
62
+ print("Maintenance columns after loading:", maintenance.columns)
63
+ print("Stub columns after loading:", stub.columns)
64
+ print("Distance columns after loading:", distance.columns)
65
+ print("Odometer columns after loading:", odometer.columns)
66
+
67
+ # --- Clean & Standardize IDs ---
68
+ finance["truck_id"] = finance["unit_id"].apply(clean_truck_id)
69
+ maintenance["truck_id"] = maintenance["unit_id"].apply(clean_truck_id)
70
+ stub["truck_id"] = stub["TRUCK"].apply(clean_truck_id)
71
+ odometer["truck_id"] = odometer["unit_id"].apply(clean_truck_id)
72
+ distance["truck_id"] = distance["unit_id"].apply(clean_truck_id)
73
+
74
+ return finance, maintenance, distance, odometer, stub, paper
75
+ except FileNotFoundError as e:
76
+ print(f"Error: One or more input files not found. Please ensure all Excel files are in the same directory as the script. Missing file: {e.filename}")
77
+ # In a Gradio app, sys.exit() would stop the server. Instead, return None or raise a specific error.
78
+ raise gr.Error(f"Required file not found: {e.filename}. Please upload all necessary Excel files.")
79
+ except Exception as e:
80
+ print(f"An unexpected error occurred during data loading: {e}")
81
+ raise gr.Error(f"An error occurred during data loading: {e}")
82
+
83
+ # === Initial Data Processing (called once at app startup) ===
84
+ def initial_data_processing():
85
+ """
86
+ Loads, cleans, merges, and prepares all data for the Gradio app.
87
+ Populates global variables used by prediction and plotting functions.
88
+ """
89
+ global global_df, global_brand_resale, global_brand_resale_mean, \
90
+ global_fair_market_value_mean, global_purchase_amount_mean, \
91
+ global_monthly_payment_mean, global_ownership_types
92
+
93
+ try:
94
+ finance, maintenance, distance, odometer, stub, paper = load_and_clean_data()
95
+
96
+ # --- Maintenance Summary ---
97
+ maintenance_summary = maintenance.groupby("truck_id").agg(
98
+ total_repairs=("amount", "sum"),
99
+ shop_visits=("truck_id", "count")
100
+ ).reset_index()
101
+
102
+ # --- Stub Usage ---
103
+ stub_summary = stub.groupby("truck_id").agg(
104
+ usage_records=("truck_id", "count")
105
+ ).reset_index()
106
+
107
+ # --- 10-Week Distance Summary ---
108
+ latest = distance['date'].max()
109
+ last10 = distance[distance['date'].notna() & (distance['date'] >= (latest - pd.Timedelta(weeks=10)))]
110
+ distance_summary = last10.groupby("truck_id").agg(
111
+ last_10w_miles=('distance', 'sum')
112
+ ).reset_index()
113
+
114
+ # --- Odometer Summary ---
115
+ odometer_cleaned = odometer[odometer['pay_date'].notna() & odometer['odometer'].notna()]
116
+ odo_summary = odometer_cleaned.sort_values(['truck_id', 'pay_date']).groupby("truck_id").agg(
117
+ odo_start=('odometer', 'first'),
118
+ odo_end=('odometer', 'last')
119
+ ).reset_index()
120
+ odo_summary["odo_diff"] = odo_summary["odo_end"] - odo_summary["odo_start"]
121
+
122
+ # --- Resale Values (avg per make) ---
123
+ paper['truck_brand'] = paper['truck_brand'].str.upper()
124
+ global_brand_resale = paper.groupby("truck_brand").agg(
125
+ avg_resale_value=('truck_price', 'mean')
126
+ ).reset_index()
127
+ global_brand_resale_mean = global_brand_resale['avg_resale_value'].mean()
128
+
129
+ # --- Merge All Sources ---
130
+ df = finance.merge(maintenance_summary, on="truck_id", how="left")
131
+ df = df.merge(stub_summary, on="truck_id", how="left")
132
+ df = df.merge(distance_summary, on="truck_id", how="left")
133
+ df = df.merge(odo_summary[['truck_id', 'odo_diff']], on="truck_id", how="left")
134
+
135
+ df['make'] = df['make'].str.upper()
136
+ df = df.merge(global_brand_resale, left_on='make', right_on='truck_brand', how='left')
137
+ df.drop(columns=['truck_brand'], inplace=True)
138
+
139
+ # --- Standardize 'ownership_type' ---
140
+ df['ownership_type'] = df['ownership_type'].astype(str).str.strip().str.upper()
141
+ global_ownership_types = df['ownership_type'].unique().tolist() # Store for Gradio dropdown
142
+
143
+ # --- Handle NaNs for decision-making columns ---
144
+ df["total_repairs"] = df["total_repairs"].fillna(0)
145
+ df["shop_visits"] = df["shop_visits"].fillna(0)
146
+ df["usage_records"] = df["usage_records"].fillna(0)
147
+ df["last_10w_miles"] = df["last_10w_miles"].fillna(0)
148
+
149
+ df["odo_diff"] = df["odo_diff"].fillna(0).apply(lambda x: 0 if x < 0 else x)
150
+
151
+ # Calculate means for imputation, handling potential NaN means if column is all NaN
152
+ global_fair_market_value_mean = df['fair_market_value'].mean()
153
+ global_purchase_amount_mean = df['purchase_amount'].mean()
154
+ global_monthly_payment_mean = df['monthly_payment'].mean()
155
+
156
+ df["avg_resale_value"] = df["avg_resale_value"].fillna(global_brand_resale_mean if not pd.isna(global_brand_resale_mean) else 0)
157
+ df["fair_market_value"] = df["fair_market_value"].fillna(global_fair_market_value_mean if not pd.isna(global_fair_market_value_mean) else 0)
158
+ df["purchase_amount"] = df["purchase_amount"].fillna(global_purchase_amount_mean if not pd.isna(global_purchase_amount_mean) else 0)
159
+ df["monthly_payment"] = df["monthly_payment"].fillna(global_monthly_payment_mean if not pd.isna(global_monthly_payment_mean) else 0)
160
+
161
+ # --- Add CPM ---
162
+ # Replace odo_diff = 0 with 1 for CPM calculation to avoid division by zero and get non-zero CPM
163
+ df['odo_diff_for_cpm'] = df['odo_diff'].replace(0, 1)
164
+ df["CPM"] = df["total_repairs"] / df["odo_diff_for_cpm"]
165
+ df["CPM"] = df["CPM"].replace([np.inf, -np.inf], np.nan)
166
+ df["CPM"] = df["CPM"].fillna(0)
167
+
168
+ # --- Apply decision logic to the full dataset for plotting the breakdown ---
169
+ def make_decision_for_df(row):
170
+ # This is the same logic as before, applied to the full DataFrame
171
+ # 1. Scrap:
172
+ if (row['total_repairs'] > 8000 and
173
+ row['last_10w_miles'] < 500 and
174
+ row['odo_diff'] > 70000 and
175
+ row['CPM'] > 0.2 and
176
+ row['purchase_amount'] < 20000):
177
+ return "Scrap"
178
+ # 2. Sell:
179
+ elif (row['total_repairs'] > 5000 and
180
+ row['last_10w_miles'] < 1000 and
181
+ row['fair_market_value'] > row['purchase_amount'] and
182
+ row['odo_diff'] > 50000):
183
+ return "Sell"
184
+ # 3. Lease:
185
+ elif (row['ownership_type'] == 'OPERATING LEASE' and
186
+ row['monthly_payment'] > 600 and
187
+ row['purchase_amount'] < 30000 and
188
+ row['fair_market_value'] > 28000 and
189
+ row['odo_diff'] < 40000):
190
+ return "Lease"
191
+ # 4. Keep:
192
+ elif (row['total_repairs'] < 3000 and
193
+ row['last_10w_miles'] > 2000 and
194
+ row['fair_market_value'] < row['purchase_amount'] and
195
+ row['odo_diff'] < 30000):
196
+ return "Keep"
197
+ # 5. Analyze: Default
198
+ else:
199
+ return "Analyze"
200
+
201
+ df["Decision"] = df.apply(make_decision_for_df, axis=1)
202
+
203
+ global_df = df # Store the fully processed DataFrame globally for plotting
204
+ print("Initial data processing complete. Data loaded for Gradio app.")
205
+
206
+ except gr.Error as e:
207
+ print(f"Gradio Error during initial data processing: {e}")
208
+ # Allow the app to start but indicate data is not ready
209
+ global_df = pd.DataFrame() # Empty DataFrame to prevent errors in plotting
210
+ except Exception as e:
211
+ print(f"Unexpected error during initial data processing: {e}")
212
+ global_df = pd.DataFrame() # Empty DataFrame
213
+
214
+
215
+ # === Decision Prediction Function for Gradio Interface ===
216
+ def predict_decision(total_repairs, last_10w_miles, odo_diff, cpm, purchase_amount, fair_market_value, monthly_payment, ownership_type_str, make):
217
+ """
218
+ Predicts the decision for a single truck based on user inputs.
219
+ Uses globally pre-calculated means for missing values if inputs are None.
220
+ """
221
+ # Ensure inputs are numeric where expected, handle potential None/empty string from Gradio
222
+ total_repairs = float(total_repairs) if total_repairs is not None else 0.0
223
+ last_10w_miles = float(last_10w_miles) if last_10w_miles is not None else 0.0
224
+ odo_diff = float(odo_diff) if odo_diff is not None else 0.0
225
+ cpm = float(cpm) if cpm is not None else 0.0
226
+
227
+ # Use global means for financial values if user input is None
228
+ purchase_amount = float(purchase_amount) if purchase_amount is not None else global_purchase_amount_mean
229
+ fair_market_value = float(fair_market_value) if fair_market_value is not None else global_fair_market_value_mean
230
+ monthly_payment = float(monthly_payment) if monthly_payment is not None else global_monthly_payment_mean
231
+
232
+ ownership_type_str = ownership_type_str.strip().upper() if ownership_type_str is not None else "UNKNOWN"
233
+ make = make.strip().upper() if make is not None else "UNKNOWN"
234
+
235
+ # For avg_resale_value, try to get it from the pre-calculated global_brand_resale, else use global mean
236
+ avg_resale_value_lookup = global_brand_resale.loc[global_brand_resale['truck_brand'] == make, 'avg_resale_value'].values if global_brand_resale is not None else []
237
+ if len(avg_resale_value_lookup) > 0:
238
+ avg_resale_value = avg_resale_value_lookup[0]
239
+ else:
240
+ avg_resale_value = global_brand_resale_mean # Use overall mean if brand not found or data not loaded
241
+
242
+ # Apply the same logic as make_decision, but directly with the input variables
243
+ # 1. Scrap:
244
+ if (total_repairs > 8000 and
245
+ last_10w_miles < 500 and
246
+ odo_diff > 70000 and
247
+ cpm > 0.2 and
248
+ purchase_amount < 20000):
249
+ return "Scrap"
250
+
251
+ # 2. Sell:
252
+ elif (total_repairs > 5000 and
253
+ last_10w_miles < 1000 and
254
+ fair_market_value > purchase_amount and
255
+ odo_diff > 50000):
256
+ return "Sell"
257
+
258
+ # 3. Lease:
259
+ elif (ownership_type_str == 'OPERATING LEASE' and
260
+ monthly_payment > 600 and
261
+ purchase_amount < 30000 and
262
+ fair_market_value > 28000 and
263
+ odo_diff < 40000):
264
+ return "Lease"
265
+
266
+ # 4. Keep:
267
+ elif (total_repairs < 3000 and
268
+ last_10w_miles > 2000 and
269
+ fair_market_value < purchase_amount and
270
+ odo_diff < 30000):
271
+ return "Keep"
272
+
273
+ # 5. Analyze: Default
274
+ else:
275
+ return "Analyze"
276
+
277
+ # === Plot Generation Function for Gradio Interface ===
278
+ def generate_plots():
279
+ """
280
+ Generates various plots from the processed global_df and returns them as base64 encoded images.
281
+ """
282
+ if global_df is None or global_df.empty:
283
+ return "Error: Data not loaded or is empty. Please ensure input files are present and valid."
284
+
285
+ plot_outputs = []
286
+
287
+ # Plot 1: Decision Breakdown
288
+ try:
289
+ plt.figure(figsize=(8, 6))
290
+ sns.countplot(data=global_df, x='Decision', palette='viridis', order=global_df['Decision'].value_counts().index)
291
+ plt.title('Decision Breakdown for the Fleet')
292
+ plt.xlabel('Decision')
293
+ plt.ylabel('Number of Trucks')
294
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
295
+ buf = io.BytesIO()
296
+ plt.savefig(buf, format='png')
297
+ plt.close()
298
+ plot_outputs.append(gr.Image(value=buf.getvalue(), label="Decision Breakdown")._data)
299
+ except Exception as e:
300
+ plot_outputs.append(f"Error generating Decision Breakdown plot: {e}")
301
+
302
+ # Plot 2: Total Repairs by Ownership Type
303
+ try:
304
+ plt.figure(figsize=(12, 7))
305
+ sns.boxplot(data=global_df, x='ownership_type', y='total_repairs', palette='coolwarm')
306
+ plt.title('Total Repairs by Ownership Type')
307
+ plt.xlabel('Ownership Type')
308
+ plt.ylabel('Total Repairs ($)')
309
+ plt.xticks(rotation=45, ha='right')
310
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
311
+ plt.tight_layout()
312
+ buf = io.BytesIO()
313
+ plt.savefig(buf, format='png')
314
+ plt.close()
315
+ plot_outputs.append(gr.Image(value=buf.getvalue(), label="Total Repairs by Ownership Type")._data)
316
+ except Exception as e:
317
+ plot_outputs.append(f"Error generating Total Repairs plot: {e}")
318
+
319
+ # Plot 3: Last 10 Weeks Miles Distribution
320
+ try:
321
+ plt.figure(figsize=(10, 6))
322
+ sns.histplot(data=global_df, x='last_10w_miles', bins=30, kde=True, color='skyblue')
323
+ plt.title('Distribution of Last 10 Weeks Miles')
324
+ plt.xlabel('Last 10 Weeks Miles')
325
+ plt.ylabel('Number of Trucks')
326
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
327
+ buf = io.BytesIO()
328
+ plt.savefig(buf, format='png')
329
+ plt.close()
330
+ plot_outputs.append(gr.Image(value=buf.getvalue(), label="Last 10 Weeks Miles Distribution")._data)
331
+ except Exception as e:
332
+ plot_outputs.append(f"Error generating Miles Distribution plot: {e}")
333
+
334
+ # Plot 4: Fair Market Value vs. Purchase Amount
335
+ try:
336
+ plt.figure(figsize=(10, 7))
337
+ sns.scatterplot(data=global_df, x='purchase_amount', y='fair_market_value', hue='Decision', palette='deep', alpha=0.7)
338
+ plt.title('Fair Market Value vs. Purchase Amount by Decision')
339
+ plt.xlabel('Purchase Amount ($)')
340
+ plt.ylabel('Fair Market Value ($)')
341
+ plt.grid(linestyle='--', alpha=0.7)
342
+ plt.tight_layout()
343
+ buf = io.BytesIO()
344
+ plt.savefig(buf, format='png')
345
+ plt.close()
346
+ plot_outputs.append(gr.Image(value=buf.getvalue(), label="Fair Market Value vs. Purchase Amount")._data)
347
+ except Exception as e:
348
+ plot_outputs.append(f"Error generating FMV vs Purchase plot: {e}")
349
+
350
+ return plot_outputs
351
+
352
+ # --- Initial Data Loading and Processing Call ---
353
+ # This will run once when the Gradio app starts up
354
+ initial_data_processing()
355
+
356
+ # --- Gradio Interface Definition ---
357
+
358
+ # Define inputs for the Decision Predictor tab
359
+ decision_inputs = [
360
+ gr.Number(label="Total Repairs ($)", value=0.0),
361
+ gr.Number(label="Last 10 Weeks Miles", value=0.0),
362
+ gr.Number(label="Odometer Difference (odo_diff)", value=0.0),
363
+ gr.Number(label="Cost Per Mile (CPM)", value=0.0),
364
+ gr.Number(label="Purchase Amount ($)", value=0.0),
365
+ gr.Number(label="Fair Market Value ($)", value=0.0),
366
+ gr.Number(label="Monthly Payment ($)", value=0.0),
367
+ gr.Dropdown(label="Ownership Type", choices=global_ownership_types if global_ownership_types else ["OWNER OPERATOR OWNED", "OPERATING LEASE", "FINANCED", "LEASE PURCHASE", "RENTAL", "FMV LEASE", "NAN"], value="OWNER OPERATOR OWNED"),
368
+ gr.Textbox(label="Make (e.g., FORD)", value="FORD")
369
+ ]
370
+
371
+ # Create the Gradio Interface
372
+ with gr.Blocks() as demo:
373
+ gr.Markdown("# Truck Evaluation Application")
374
+ gr.Markdown("Use this app to predict truck decisions and visualize fleet data.")
375
+
376
+ with gr.Tab("Decision Predictor"):
377
+ gr.Markdown("## Predict Truck Decision")
378
+ gr.Markdown("Enter the details for a single truck to get a decision.")
379
+
380
+ with gr.Row():
381
+ for input_comp in decision_inputs:
382
+ input_comp.render()
383
+
384
+ predict_button = gr.Button("Get Decision")
385
+ decision_output = gr.Textbox(label="Decision", interactive=False)
386
+
387
+ predict_button.click(
388
+ fn=predict_decision,
389
+ inputs=decision_inputs,
390
+ outputs=decision_output
391
+ )
392
+
393
+ with gr.Tab("Data Visualizations"):
394
+ gr.Markdown("## Fleet Data Visualizations")
395
+ gr.Markdown("Explore insights from your truck data.")
396
+
397
+ plot_button = gr.Button("Generate Plots")
398
+
399
+ # Output components for plots
400
+ plot_outputs = [
401
+ gr.Image(label="Decision Breakdown", interactive=False, visible=True),
402
+ gr.Image(label="Total Repairs by Ownership Type", interactive=False, visible=True),
403
+ gr.Image(label="Last 10 Weeks Miles Distribution", interactive=False, visible=True),
404
+ gr.Image(label="Fair Market Value vs. Purchase Amount", interactive=False, visible=True)
405
+ ]
406
+
407
+ plot_button.click(
408
+ fn=generate_plots,
409
+ inputs=[],
410
+ outputs=plot_outputs
411
+ )
412
+
413
+ # Launch the Gradio app
414
+ if __name__ == "__main__":
415
+ demo.launch()