ans123 commited on
Commit
b009ea4
·
verified ·
1 Parent(s): 38cf4b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -57
app.py CHANGED
@@ -7,14 +7,13 @@ import io
7
  import base64
8
 
9
  # --- Global Variables to store processed data ---
10
- # These will be populated once when the Gradio app starts
11
  global_df = None
12
  global_brand_resale = None
13
  global_brand_resale_mean = 0
14
  global_fair_market_value_mean = 0
15
  global_purchase_amount_mean = 0
16
  global_monthly_payment_mean = 0
17
- global_ownership_types = [] # To populate the dropdown for ownership type
18
 
19
  # === Truck ID Cleaner ===
20
  def clean_truck_id(val):
@@ -74,7 +73,6 @@ def load_and_clean_data():
74
  return finance, maintenance, distance, odometer, stub, paper
75
  except FileNotFoundError as e:
76
  print(f"Error: One or more input files not found. Please ensure all Excel files are in the same directory as the script. Missing file: {e.filename}")
77
- # In a Gradio app, sys.exit() would stop the server. Instead, return None or raise a specific error.
78
  raise gr.Error(f"Required file not found: {e.filename}. Please upload all necessary Excel files.")
79
  except Exception as e:
80
  print(f"An unexpected error occurred during data loading: {e}")
@@ -138,7 +136,16 @@ def initial_data_processing():
138
 
139
  # --- Standardize 'ownership_type' ---
140
  df['ownership_type'] = df['ownership_type'].astype(str).str.strip().str.upper()
141
- global_ownership_types = df['ownership_type'].unique().tolist() # Store for Gradio dropdown
 
 
 
 
 
 
 
 
 
142
 
143
  # --- Handle NaNs for decision-making columns ---
144
  df["total_repairs"] = df["total_repairs"].fillna(0)
@@ -148,11 +155,6 @@ def initial_data_processing():
148
 
149
  df["odo_diff"] = df["odo_diff"].fillna(0).apply(lambda x: 0 if x < 0 else x)
150
 
151
- # Calculate means for imputation, handling potential NaN means if column is all NaN
152
- global_fair_market_value_mean = df['fair_market_value'].mean()
153
- global_purchase_amount_mean = df['purchase_amount'].mean()
154
- global_monthly_payment_mean = df['monthly_payment'].mean()
155
-
156
  df["avg_resale_value"] = df["avg_resale_value"].fillna(global_brand_resale_mean if not pd.isna(global_brand_resale_mean) else 0)
157
  df["fair_market_value"] = df["fair_market_value"].fillna(global_fair_market_value_mean if not pd.isna(global_fair_market_value_mean) else 0)
158
  df["purchase_amount"] = df["purchase_amount"].fillna(global_purchase_amount_mean if not pd.isna(global_purchase_amount_mean) else 0)
@@ -200,16 +202,16 @@ def initial_data_processing():
200
 
201
  df["Decision"] = df.apply(make_decision_for_df, axis=1)
202
 
203
- global_df = df # Store the fully processed DataFrame globally for plotting
204
  print("Initial data processing complete. Data loaded for Gradio app.")
205
 
206
  except gr.Error as e:
207
  print(f"Gradio Error during initial data processing: {e}")
208
- # Allow the app to start but indicate data is not ready
209
- global_df = pd.DataFrame() # Empty DataFrame to prevent errors in plotting
210
  except Exception as e:
211
  print(f"Unexpected error during initial data processing: {e}")
212
- global_df = pd.DataFrame() # Empty DataFrame
213
 
214
 
215
  # === Decision Prediction Function for Gradio Interface ===
@@ -218,26 +220,29 @@ def predict_decision(total_repairs, last_10w_miles, odo_diff, cpm, purchase_amou
218
  Predicts the decision for a single truck based on user inputs.
219
  Uses globally pre-calculated means for missing values if inputs are None.
220
  """
221
- # Ensure inputs are numeric where expected, handle potential None/empty string from Gradio
222
  total_repairs = float(total_repairs) if total_repairs is not None else 0.0
223
  last_10w_miles = float(last_10w_miles) if last_10w_miles is not None else 0.0
224
  odo_diff = float(odo_diff) if odo_diff is not None else 0.0
225
  cpm = float(cpm) if cpm is not None else 0.0
226
 
227
- # Use global means for financial values if user input is None
228
- purchase_amount = float(purchase_amount) if purchase_amount is not None else global_purchase_amount_mean
229
- fair_market_value = float(fair_market_value) if fair_market_value is not None else global_fair_market_value_mean
230
- monthly_payment = float(monthly_payment) if monthly_payment is not None else global_monthly_payment_mean
231
 
232
  ownership_type_str = ownership_type_str.strip().upper() if ownership_type_str is not None else "UNKNOWN"
233
  make = make.strip().upper() if make is not None else "UNKNOWN"
234
 
235
  # For avg_resale_value, try to get it from the pre-calculated global_brand_resale, else use global mean
236
- avg_resale_value_lookup = global_brand_resale.loc[global_brand_resale['truck_brand'] == make, 'avg_resale_value'].values if global_brand_resale is not None else []
237
- if len(avg_resale_value_lookup) > 0:
238
- avg_resale_value = avg_resale_value_lookup[0]
239
- else:
240
- avg_resale_value = global_brand_resale_mean # Use overall mean if brand not found or data not loaded
 
 
 
241
 
242
  # Apply the same logic as make_decision, but directly with the input variables
243
  # 1. Scrap:
@@ -280,9 +285,10 @@ def generate_plots():
280
  Generates various plots from the processed global_df and returns them as base64 encoded images.
281
  """
282
  if global_df is None or global_df.empty:
283
- return "Error: Data not loaded or is empty. Please ensure input files are present and valid."
 
284
 
285
- plot_outputs = []
286
 
287
  # Plot 1: Decision Breakdown
288
  try:
@@ -295,26 +301,33 @@ def generate_plots():
295
  buf = io.BytesIO()
296
  plt.savefig(buf, format='png')
297
  plt.close()
298
- plot_outputs.append(gr.Image(value=buf.getvalue(), label="Decision Breakdown")._data)
299
  except Exception as e:
300
- plot_outputs.append(f"Error generating Decision Breakdown plot: {e}")
 
301
 
302
  # Plot 2: Total Repairs by Ownership Type
303
  try:
304
  plt.figure(figsize=(12, 7))
305
- sns.boxplot(data=global_df, x='ownership_type', y='total_repairs', palette='coolwarm')
306
- plt.title('Total Repairs by Ownership Type')
307
- plt.xlabel('Ownership Type')
308
- plt.ylabel('Total Repairs ($)')
309
- plt.xticks(rotation=45, ha='right')
310
- plt.grid(axis='y', linestyle='--', alpha=0.7)
311
- plt.tight_layout()
312
- buf = io.BytesIO()
313
- plt.savefig(buf, format='png')
314
- plt.close()
315
- plot_outputs.append(gr.Image(value=buf.getvalue(), label="Total Repairs by Ownership Type")._data)
 
 
 
 
 
316
  except Exception as e:
317
- plot_outputs.append(f"Error generating Total Repairs plot: {e}")
 
318
 
319
  # Plot 3: Last 10 Weeks Miles Distribution
320
  try:
@@ -327,35 +340,42 @@ def generate_plots():
327
  buf = io.BytesIO()
328
  plt.savefig(buf, format='png')
329
  plt.close()
330
- plot_outputs.append(gr.Image(value=buf.getvalue(), label="Last 10 Weeks Miles Distribution")._data)
331
  except Exception as e:
332
- plot_outputs.append(f"Error generating Miles Distribution plot: {e}")
 
333
 
334
  # Plot 4: Fair Market Value vs. Purchase Amount
335
  try:
336
  plt.figure(figsize=(10, 7))
337
- sns.scatterplot(data=global_df, x='purchase_amount', y='fair_market_value', hue='Decision', palette='deep', alpha=0.7)
338
- plt.title('Fair Market Value vs. Purchase Amount by Decision')
339
- plt.xlabel('Purchase Amount ($)')
340
- plt.ylabel('Fair Market Value ($)')
341
- plt.grid(linestyle='--', alpha=0.7)
342
- plt.tight_layout()
343
- buf = io.BytesIO()
344
- plt.savefig(buf, format='png')
345
- plt.close()
346
- plot_outputs.append(gr.Image(value=buf.getvalue(), label="Fair Market Value vs. Purchase Amount")._data)
 
 
 
 
 
347
  except Exception as e:
348
- plot_outputs.append(f"Error generating FMV vs Purchase plot: {e}")
 
349
 
350
- return plot_outputs
351
 
352
  # --- Initial Data Loading and Processing Call ---
353
- # This will run once when the Gradio app starts up
354
  initial_data_processing()
355
 
356
  # --- Gradio Interface Definition ---
357
 
358
  # Define inputs for the Decision Predictor tab
 
359
  decision_inputs = [
360
  gr.Number(label="Total Repairs ($)", value=0.0),
361
  gr.Number(label="Last 10 Weeks Miles", value=0.0),
@@ -364,7 +384,7 @@ decision_inputs = [
364
  gr.Number(label="Purchase Amount ($)", value=0.0),
365
  gr.Number(label="Fair Market Value ($)", value=0.0),
366
  gr.Number(label="Monthly Payment ($)", value=0.0),
367
- gr.Dropdown(label="Ownership Type", choices=global_ownership_types if global_ownership_types else ["OWNER OPERATOR OWNED", "OPERATING LEASE", "FINANCED", "LEASE PURCHASE", "RENTAL", "FMV LEASE", "NAN"], value="OWNER OPERATOR OWNED"),
368
  gr.Textbox(label="Make (e.g., FORD)", value="FORD")
369
  ]
370
 
@@ -397,7 +417,8 @@ with gr.Blocks() as demo:
397
  plot_button = gr.Button("Generate Plots")
398
 
399
  # Output components for plots
400
- plot_outputs = [
 
401
  gr.Image(label="Decision Breakdown", interactive=False, visible=True),
402
  gr.Image(label="Total Repairs by Ownership Type", interactive=False, visible=True),
403
  gr.Image(label="Last 10 Weeks Miles Distribution", interactive=False, visible=True),
@@ -407,7 +428,7 @@ with gr.Blocks() as demo:
407
  plot_button.click(
408
  fn=generate_plots,
409
  inputs=[],
410
- outputs=plot_outputs
411
  )
412
 
413
  # Launch the Gradio app
 
7
  import base64
8
 
9
  # --- Global Variables to store processed data ---
 
10
  global_df = None
11
  global_brand_resale = None
12
  global_brand_resale_mean = 0
13
  global_fair_market_value_mean = 0
14
  global_purchase_amount_mean = 0
15
  global_monthly_payment_mean = 0
16
+ global_ownership_types = []
17
 
18
  # === Truck ID Cleaner ===
19
  def clean_truck_id(val):
 
73
  return finance, maintenance, distance, odometer, stub, paper
74
  except FileNotFoundError as e:
75
  print(f"Error: One or more input files not found. Please ensure all Excel files are in the same directory as the script. Missing file: {e.filename}")
 
76
  raise gr.Error(f"Required file not found: {e.filename}. Please upload all necessary Excel files.")
77
  except Exception as e:
78
  print(f"An unexpected error occurred during data loading: {e}")
 
136
 
137
  # --- Standardize 'ownership_type' ---
138
  df['ownership_type'] = df['ownership_type'].astype(str).str.strip().str.upper()
139
+ global_ownership_types = df['ownership_type'].unique().tolist()
140
+ # Ensure 'NAN' is handled if it appears due to missing ownership types
141
+ if 'NAN' in global_ownership_types:
142
+ global_ownership_types.remove('NAN')
143
+ global_ownership_types.sort() # Sort for better display in dropdown
144
+
145
+ # Calculate means for imputation, handling potential NaN means if column is all NaN
146
+ global_fair_market_value_mean = df['fair_market_value'].mean()
147
+ global_purchase_amount_mean = df['purchase_amount'].mean()
148
+ global_monthly_payment_mean = df['monthly_payment'].mean()
149
 
150
  # --- Handle NaNs for decision-making columns ---
151
  df["total_repairs"] = df["total_repairs"].fillna(0)
 
155
 
156
  df["odo_diff"] = df["odo_diff"].fillna(0).apply(lambda x: 0 if x < 0 else x)
157
 
 
 
 
 
 
158
  df["avg_resale_value"] = df["avg_resale_value"].fillna(global_brand_resale_mean if not pd.isna(global_brand_resale_mean) else 0)
159
  df["fair_market_value"] = df["fair_market_value"].fillna(global_fair_market_value_mean if not pd.isna(global_fair_market_value_mean) else 0)
160
  df["purchase_amount"] = df["purchase_amount"].fillna(global_purchase_amount_mean if not pd.isna(global_purchase_amount_mean) else 0)
 
202
 
203
  df["Decision"] = df.apply(make_decision_for_df, axis=1)
204
 
205
+ global_df = df.copy() # Make a copy to avoid SettingWithCopyWarning if modified later
206
  print("Initial data processing complete. Data loaded for Gradio app.")
207
 
208
  except gr.Error as e:
209
  print(f"Gradio Error during initial data processing: {e}")
210
+ # If an error occurs, ensure global_df is an empty DataFrame to prevent further errors
211
+ global_df = pd.DataFrame()
212
  except Exception as e:
213
  print(f"Unexpected error during initial data processing: {e}")
214
+ global_df = pd.DataFrame()
215
 
216
 
217
  # === Decision Prediction Function for Gradio Interface ===
 
220
  Predicts the decision for a single truck based on user inputs.
221
  Uses globally pre-calculated means for missing values if inputs are None.
222
  """
223
+ # Handle potentially None inputs from Gradio and ensure numeric types
224
  total_repairs = float(total_repairs) if total_repairs is not None else 0.0
225
  last_10w_miles = float(last_10w_miles) if last_10w_miles is not None else 0.0
226
  odo_diff = float(odo_diff) if odo_diff is not None else 0.0
227
  cpm = float(cpm) if cpm is not None else 0.0
228
 
229
+ # Use global means for financial values if user input is None, and ensure they are float
230
+ purchase_amount = float(purchase_amount) if purchase_amount is not None else (global_purchase_amount_mean if not pd.isna(global_purchase_amount_mean) else 0.0)
231
+ fair_market_value = float(fair_market_value) if fair_market_value is not None else (global_fair_market_value_mean if not pd.isna(global_fair_market_value_mean) else 0.0)
232
+ monthly_payment = float(monthly_payment) if monthly_payment is not None else (global_monthly_payment_mean if not pd.isna(global_monthly_payment_mean) else 0.0)
233
 
234
  ownership_type_str = ownership_type_str.strip().upper() if ownership_type_str is not None else "UNKNOWN"
235
  make = make.strip().upper() if make is not None else "UNKNOWN"
236
 
237
  # For avg_resale_value, try to get it from the pre-calculated global_brand_resale, else use global mean
238
+ avg_resale_value = 0.0 # Default if global_brand_resale is not loaded
239
+ if global_brand_resale is not None:
240
+ avg_resale_value_lookup = global_brand_resale.loc[global_brand_resale['truck_brand'] == make, 'avg_resale_value'].values
241
+ if len(avg_resale_value_lookup) > 0:
242
+ avg_resale_value = avg_resale_value_lookup[0]
243
+ else:
244
+ avg_resale_value = global_brand_resale_mean if not pd.isna(global_brand_resale_mean) else 0.0
245
+
246
 
247
  # Apply the same logic as make_decision, but directly with the input variables
248
  # 1. Scrap:
 
285
  Generates various plots from the processed global_df and returns them as base64 encoded images.
286
  """
287
  if global_df is None or global_df.empty:
288
+ # Return a list of None values for the images if data is not loaded
289
+ return [None, None, None, None]
290
 
291
+ plot_buffers = [] # Store image bytes here
292
 
293
  # Plot 1: Decision Breakdown
294
  try:
 
301
  buf = io.BytesIO()
302
  plt.savefig(buf, format='png')
303
  plt.close()
304
+ plot_buffers.append(buf.getvalue())
305
  except Exception as e:
306
+ print(f"Error generating Decision Breakdown plot: {e}")
307
+ plot_buffers.append(None) # Append None if plot generation fails
308
 
309
  # Plot 2: Total Repairs by Ownership Type
310
  try:
311
  plt.figure(figsize=(12, 7))
312
+ # Filter out NaN/None ownership types if any remain for plotting robustness
313
+ plot_df = global_df[global_df['ownership_type'].notna() & (global_df['ownership_type'] != 'NAN')]
314
+ if not plot_df.empty:
315
+ sns.boxplot(data=plot_df, x='ownership_type', y='total_repairs', palette='coolwarm')
316
+ plt.title('Total Repairs by Ownership Type')
317
+ plt.xlabel('Ownership Type')
318
+ plt.ylabel('Total Repairs ($)')
319
+ plt.xticks(rotation=45, ha='right')
320
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
321
+ plt.tight_layout()
322
+ buf = io.BytesIO()
323
+ plt.savefig(buf, format='png')
324
+ plt.close()
325
+ plot_buffers.append(buf.getvalue())
326
+ else:
327
+ plot_buffers.append(None)
328
  except Exception as e:
329
+ print(f"Error generating Total Repairs plot: {e}")
330
+ plot_buffers.append(None)
331
 
332
  # Plot 3: Last 10 Weeks Miles Distribution
333
  try:
 
340
  buf = io.BytesIO()
341
  plt.savefig(buf, format='png')
342
  plt.close()
343
+ plot_buffers.append(buf.getvalue())
344
  except Exception as e:
345
+ print(f"Error generating Miles Distribution plot: {e}")
346
+ plot_buffers.append(None)
347
 
348
  # Plot 4: Fair Market Value vs. Purchase Amount
349
  try:
350
  plt.figure(figsize=(10, 7))
351
+ # Ensure columns are numeric and handle potential NaNs for plotting
352
+ plot_df = global_df.dropna(subset=['purchase_amount', 'fair_market_value', 'Decision'])
353
+ if not plot_df.empty:
354
+ sns.scatterplot(data=plot_df, x='purchase_amount', y='fair_market_value', hue='Decision', palette='deep', alpha=0.7)
355
+ plt.title('Fair Market Value vs. Purchase Amount by Decision')
356
+ plt.xlabel('Purchase Amount ($)')
357
+ plt.ylabel('Fair Market Value ($)')
358
+ plt.grid(linestyle='--', alpha=0.7)
359
+ plt.tight_layout()
360
+ buf = io.BytesIO()
361
+ plt.savefig(buf, format='png')
362
+ plt.close()
363
+ plot_buffers.append(buf.getvalue())
364
+ else:
365
+ plot_buffers.append(None)
366
  except Exception as e:
367
+ print(f"Error generating FMV vs Purchase plot: {e}")
368
+ plot_buffers.append(None)
369
 
370
+ return plot_buffers
371
 
372
  # --- Initial Data Loading and Processing Call ---
 
373
  initial_data_processing()
374
 
375
  # --- Gradio Interface Definition ---
376
 
377
  # Define inputs for the Decision Predictor tab
378
+ # Use the dynamically populated global_ownership_types for the dropdown choices
379
  decision_inputs = [
380
  gr.Number(label="Total Repairs ($)", value=0.0),
381
  gr.Number(label="Last 10 Weeks Miles", value=0.0),
 
384
  gr.Number(label="Purchase Amount ($)", value=0.0),
385
  gr.Number(label="Fair Market Value ($)", value=0.0),
386
  gr.Number(label="Monthly Payment ($)", value=0.0),
387
+ gr.Dropdown(label="Ownership Type", choices=global_ownership_types, value=global_ownership_types[0] if global_ownership_types else "OWNER OPERATOR OWNED"),
388
  gr.Textbox(label="Make (e.g., FORD)", value="FORD")
389
  ]
390
 
 
417
  plot_button = gr.Button("Generate Plots")
418
 
419
  # Output components for plots
420
+ # These are just placeholders; the generate_plots function will return the actual image bytes
421
+ plot_outputs_components = [
422
  gr.Image(label="Decision Breakdown", interactive=False, visible=True),
423
  gr.Image(label="Total Repairs by Ownership Type", interactive=False, visible=True),
424
  gr.Image(label="Last 10 Weeks Miles Distribution", interactive=False, visible=True),
 
428
  plot_button.click(
429
  fn=generate_plots,
430
  inputs=[],
431
+ outputs=plot_outputs_components
432
  )
433
 
434
  # Launch the Gradio app