gauravlochab commited on
Commit
398c34c
·
1 Parent(s): b028096

chore: missing adjusted apr data

Browse files
Files changed (1) hide show
  1. app.py +480 -15
app.py CHANGED
@@ -267,12 +267,158 @@ def fetch_apr_data_from_db():
267
  # Log that we're skipping zero or -100 values
268
  logger.debug(f"Skipping value for agent {agent_name} ({attr['agent_id']}): {apr_data['apr']} (zero or -100)")
269
 
270
- # Convert list of dictionaries to DataFrame
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  if not apr_data_list:
272
  logger.error("No valid APR data extracted")
273
  global_df = pd.DataFrame([])
274
  return global_df
275
 
 
276
  global_df = pd.DataFrame(apr_data_list)
277
 
278
  # Log the resulting dataframe
@@ -291,9 +437,7 @@ def fetch_apr_data_from_db():
291
  avg_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).mean()
292
  max_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).max()
293
  min_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).min()
294
- logger.info(f"APR vs Adjusted APR difference: avg={avg_diff:.2f}, max={max_diff:.2f}, min={min_diff:.2f}")
295
- else:
296
- logger.info("No adjusted APR values found in the data")
297
 
298
  # All values are APR type (excluding zero and -100 values)
299
  logger.info("All values are APR type (excluding zero and -100 values)")
@@ -304,6 +448,10 @@ def fetch_apr_data_from_db():
304
  for idx, row in global_df.iterrows():
305
  logger.debug(f"Row {idx}: {row.to_dict()}")
306
 
 
 
 
 
307
  return global_df
308
 
309
  except requests.exceptions.RequestException as e:
@@ -312,10 +460,146 @@ def fetch_apr_data_from_db():
312
  return global_df
313
  except Exception as e:
314
  logger.error(f"Error fetching APR data: {e}")
315
- logger.exception("Exception details:")
316
  global_df = pd.DataFrame([])
317
  return global_df
318
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  def generate_apr_visualizations():
320
  """Generate APR visualizations with real data only (no dummy data)"""
321
  global global_df
@@ -650,6 +934,9 @@ def create_combined_time_series_graph(df):
650
  avg_apr_data_with_ma['moving_avg'] = None # 3-day window for APR
651
  avg_apr_data_with_ma['adjusted_moving_avg'] = None # 3-day window for adjusted APR
652
 
 
 
 
653
  # Calculate the moving averages for each timestamp
654
  for i, row in avg_apr_data_with_ma.iterrows():
655
  current_time = row['timestamp']
@@ -667,9 +954,18 @@ def create_combined_time_series_graph(df):
667
  logger.debug(f"APR time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['apr'].mean()}")
668
 
669
  # Calculate adjusted APR moving average if data exists
670
- if 'adjusted_apr' in window_data.columns and window_data['adjusted_apr'].notna().any():
671
- avg_apr_data_with_ma.at[i, 'adjusted_moving_avg'] = window_data['adjusted_apr'].mean()
672
- logger.debug(f"Adjusted APR time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['adjusted_apr'].mean()}")
 
 
 
 
 
 
 
 
 
673
  else:
674
  # If no data points in the window, use the current value
675
  avg_apr_data_with_ma.at[i, 'moving_avg'] = row['apr']
@@ -776,11 +1072,18 @@ def create_combined_time_series_graph(df):
776
 
777
  # Add adjusted APR moving average line if it exists
778
  if 'adjusted_moving_avg' in avg_apr_data_with_ma.columns and avg_apr_data_with_ma['adjusted_moving_avg'].notna().any():
779
- y_values_adj_ma = avg_apr_data_with_ma['adjusted_moving_avg'].tolist()
 
 
 
 
 
 
 
780
 
781
  # Create hover template for the adjusted APR moving average line
782
  hover_data_adj = []
783
- for idx, row in avg_apr_data_with_ma.iterrows():
784
  timestamp = row['timestamp']
785
  if pd.notna(row['adjusted_moving_avg']):
786
  hover_data_adj.append(
@@ -793,7 +1096,7 @@ def create_combined_time_series_graph(df):
793
 
794
  fig.add_trace(
795
  go.Scatter(
796
- x=x_values_ma,
797
  y=y_values_adj_ma,
798
  mode='lines', # Only lines for moving average
799
  line=dict(color='green', width=4), # Thicker solid line for adjusted APR
@@ -803,7 +1106,9 @@ def create_combined_time_series_graph(df):
803
  visible=True # Visible by default
804
  )
805
  )
806
- logger.info(f"Added 3-day moving average Adjusted APR trace with {len(x_values_ma)} points")
 
 
807
 
808
  # Removed cumulative APR as requested
809
  logger.info("Cumulative APR graph line has been removed as requested")
@@ -1728,10 +2033,43 @@ def dashboard():
1728
 
1729
  # Function to update the graph without parameters (for refresh button)
1730
  def refresh_graph():
1731
- return update_apr_graph(apr_toggle.value, adjusted_apr_toggle.value)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1732
 
1733
- # Set up the button click event
1734
- refresh_btn.click(fn=refresh_graph, inputs=None, outputs=[combined_graph])
 
 
 
 
1735
 
1736
  # Set up the toggle switch events
1737
  apr_toggle.change(
@@ -1751,3 +2089,130 @@ def dashboard():
1751
  # Launch the dashboard
1752
  if __name__ == "__main__":
1753
  dashboard().launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  # Log that we're skipping zero or -100 values
268
  logger.debug(f"Skipping value for agent {agent_name} ({attr['agent_id']}): {apr_data['apr']} (zero or -100)")
269
 
270
+ logger.info(f"Extracted {len(apr_data_list)} valid APR data points")
271
+
272
+ # Added debug for adjusted APR data after May 10th
273
+ may_10_2025 = datetime(2025, 5, 10)
274
+ after_may_10 = [d for d in apr_data_list if d['timestamp'] >= may_10_2025]
275
+ with_adjusted_after_may_10 = [d for d in after_may_10 if d['adjusted_apr'] is not None]
276
+
277
+ logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}")
278
+ logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}")
279
+
280
+ # Log detailed information about when data began
281
+ first_adjusted = None
282
+ if with_adjusted_after_may_10:
283
+ first_adjusted_after = min(with_adjusted_after_may_10, key=lambda x: x['timestamp'])
284
+ logger.info(f"First adjusted_apr after May 10th: {first_adjusted_after['timestamp']} (Agent: {first_adjusted_after['agent_id']})")
285
+
286
+ # Check all data for first adjusted_apr
287
+ all_with_adjusted = [d for d in apr_data_list if d['adjusted_apr'] is not None]
288
+ if all_with_adjusted:
289
+ first_adjusted = min(all_with_adjusted, key=lambda x: x['timestamp'])
290
+ logger.info(f"First adjusted_apr ever: {first_adjusted['timestamp']} (Agent: {first_adjusted['agent_id']})")
291
+ last_adjusted = max(all_with_adjusted, key=lambda x: x['timestamp'])
292
+ logger.info(f"Last adjusted_apr ever: {last_adjusted['timestamp']} (Agent: {last_adjusted['agent_id']})")
293
+
294
+ # Calculate overall coverage
295
+ adjusted_ratio = len(all_with_adjusted) / len(apr_data_list) * 100
296
+ logger.info(f"Overall adjusted_apr coverage: {adjusted_ratio:.2f}% ({len(all_with_adjusted)}/{len(apr_data_list)} records)")
297
+
298
+ # Log per-agent adjusted APR statistics
299
+ agent_stats = {}
300
+ for record in apr_data_list:
301
+ agent_id = record['agent_id']
302
+ has_adjusted = record['adjusted_apr'] is not None
303
+
304
+ if agent_id not in agent_stats:
305
+ agent_stats[agent_id] = {'total': 0, 'adjusted': 0}
306
+
307
+ agent_stats[agent_id]['total'] += 1
308
+ if has_adjusted:
309
+ agent_stats[agent_id]['adjusted'] += 1
310
+
311
+ # Log stats for agents with meaningful data
312
+ for agent_id, stats in agent_stats.items():
313
+ if stats['total'] > 0:
314
+ coverage = (stats['adjusted'] / stats['total']) * 100
315
+ if coverage > 0: # Only log agents that have at least some adjusted data
316
+ logger.info(f"Agent {agent_id}: {coverage:.2f}% adjusted coverage ({stats['adjusted']}/{stats['total']} records)")
317
+
318
+ # Check for gaps in adjusted APR data
319
+ for agent_id in agent_stats:
320
+ # Get all records for this agent
321
+ agent_records = [r for r in apr_data_list if r['agent_id'] == agent_id]
322
+ # Sort by timestamp
323
+ agent_records.sort(key=lambda x: x['timestamp'])
324
+
325
+ # Find where adjusted APR starts and if there are gaps
326
+ has_adjusted = False
327
+ gap_count = 0
328
+ streak_length = 0
329
+ for record in agent_records:
330
+ if record['adjusted_apr'] is not None:
331
+ if not has_adjusted:
332
+ has_adjusted = True
333
+ logger.info(f"Agent {agent_id}: First adjusted APR at {record['timestamp']}")
334
+ streak_length += 1
335
+ elif has_adjusted:
336
+ # We had adjusted data but now it's missing
337
+ gap_count += 1
338
+ if streak_length > 0:
339
+ logger.warning(f"Agent {agent_id}: Gap in adjusted APR data after {streak_length} consecutive records")
340
+ streak_length = 0
341
+
342
+ if gap_count > 0:
343
+ logger.warning(f"Agent {agent_id}: Found {gap_count} gaps in adjusted APR data")
344
+ elif has_adjusted:
345
+ logger.info(f"Agent {agent_id}: Continuous adjusted APR data with no gaps")
346
+
347
+ # Provide summary statistics
348
+ agents_with_data = sum(1 for stats in agent_stats.values() if stats['adjusted'] > 0)
349
+ agents_with_gaps = sum(1 for agent_id in agent_stats if
350
+ any(apr_data_list[i]['agent_id'] == agent_id and apr_data_list[i]['adjusted_apr'] is not None and
351
+ i+1 < len(apr_data_list) and apr_data_list[i+1]['agent_id'] == agent_id and
352
+ apr_data_list[i+1]['adjusted_apr'] is None
353
+ for i in range(len(apr_data_list)-1)))
354
+
355
+ logger.info(f"ADJUSTED APR SUMMARY: {agents_with_data}/{len(agent_stats)} agents have adjusted APR data")
356
+ if agents_with_gaps > 0:
357
+ logger.warning(f"ATTENTION: {agents_with_gaps} agents have gaps in their adjusted APR data")
358
+ logger.warning("These gaps may cause discontinuities in the adjusted APR graph")
359
+ else:
360
+ logger.info("No gaps detected in adjusted APR data - graph should be continuous")
361
+
362
+ if len(with_adjusted_after_may_10) == 0 and len(after_may_10) > 0:
363
+ logger.warning("No adjusted_apr values found after May 10th, 2025 despite having APR data")
364
+
365
+ # Log agent IDs with missing adjusted_apr after May 10th
366
+ agents_after_may_10 = set(d['agent_id'] for d in after_may_10)
367
+ logger.info(f"Agents with data after May 10th: {agents_after_may_10}")
368
+
369
+ # Check these same agents before May 10th
370
+ before_may_10 = [d for d in apr_data_list if d['timestamp'] < may_10_2025]
371
+ agents_with_adjusted_before = {d['agent_id'] for d in before_may_10 if d['adjusted_apr'] is not None}
372
+
373
+ # Agents that had adjusted_apr before but not after
374
+ missing_adjusted = agents_with_adjusted_before.intersection(agents_after_may_10)
375
+ if missing_adjusted:
376
+ logger.warning(f"Agents that had adjusted_apr before May 10th but not after: {missing_adjusted}")
377
+
378
+ # Find the last valid adjusted_apr date for these agents
379
+ for agent_id in missing_adjusted:
380
+ agent_data = [d for d in before_may_10 if d['agent_id'] == agent_id and d['adjusted_apr'] is not None]
381
+ if agent_data:
382
+ last_entry = max(agent_data, key=lambda d: d['timestamp'])
383
+ logger.info(f"Agent {agent_id}: Last adjusted_apr on {last_entry['timestamp']} with value {last_entry['adjusted_apr']}")
384
+
385
+ # Look at the first entry after the cutoff without adjusted_apr
386
+ agent_after = [d for d in after_may_10 if d['agent_id'] == agent_id]
387
+ if agent_after:
388
+ first_after = min(agent_after, key=lambda d: d['timestamp'])
389
+ logger.info(f"Agent {agent_id}: First entry after cutoff on {first_after['timestamp']} missing adjusted_apr")
390
+
391
+ # If the agent data has the 'adjusted_apr_key' field, log that info
392
+ if 'adjusted_apr_key' in first_after:
393
+ logger.info(f"Agent {agent_id}: Key used for adjusted_apr: {first_after['adjusted_apr_key']}")
394
+
395
+ # Add debug logic to check for any adjusted_apr after May 10th and which agents have it
396
+ elif len(with_adjusted_after_may_10) > 0:
397
+ logger.info("Found adjusted_apr values after May 10th, 2025")
398
+
399
+ # Group by agent and log
400
+ agent_counts = {}
401
+ for item in with_adjusted_after_may_10:
402
+ agent_id = item['agent_id']
403
+ if agent_id in agent_counts:
404
+ agent_counts[agent_id] += 1
405
+ else:
406
+ agent_counts[agent_id] = 1
407
+
408
+ logger.info(f"Agents with adjusted_apr after May 10th: {agent_counts}")
409
+
410
+ # Log adjusted_apr keys used
411
+ keys_used = {item.get('adjusted_apr_key') for item in with_adjusted_after_may_10 if 'adjusted_apr_key' in item}
412
+ if keys_used:
413
+ logger.info(f"Keys used for adjusted_apr after May 10th: {keys_used}")
414
+
415
+ # Convert to DataFrame
416
  if not apr_data_list:
417
  logger.error("No valid APR data extracted")
418
  global_df = pd.DataFrame([])
419
  return global_df
420
 
421
+ # Convert list of dictionaries to DataFrame
422
  global_df = pd.DataFrame(apr_data_list)
423
 
424
  # Log the resulting dataframe
 
437
  avg_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).mean()
438
  max_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).max()
439
  min_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).min()
440
+ logger.info(f"APR vs. adjusted APR difference: avg={avg_diff:.2f}, min={min_diff:.2f}, max={max_diff:.2f}")
 
 
441
 
442
  # All values are APR type (excluding zero and -100 values)
443
  logger.info("All values are APR type (excluding zero and -100 values)")
 
448
  for idx, row in global_df.iterrows():
449
  logger.debug(f"Row {idx}: {row.to_dict()}")
450
 
451
+ # Add this at the end, right before returning the global_df
452
+ logger.info("Analyzing adjusted_apr data availability...")
453
+ log_adjusted_apr_availability(global_df)
454
+
455
  return global_df
456
 
457
  except requests.exceptions.RequestException as e:
 
460
  return global_df
461
  except Exception as e:
462
  logger.error(f"Error fetching APR data: {e}")
463
+ logger.exception("Exception traceback:")
464
  global_df = pd.DataFrame([])
465
  return global_df
466
 
467
+ def log_adjusted_apr_availability(df):
468
+ """
469
+ Analyzes and logs detailed information about adjusted_apr data availability.
470
+
471
+ Args:
472
+ df: DataFrame containing the APR data with adjusted_apr column
473
+ """
474
+ if df.empty or 'adjusted_apr' not in df.columns:
475
+ logger.warning("No adjusted_apr data available for analysis")
476
+ return
477
+
478
+ # Get only rows with valid adjusted_apr values
479
+ has_adjusted = df[df['adjusted_apr'].notna()]
480
+
481
+ if has_adjusted.empty:
482
+ logger.warning("No valid adjusted_apr values found in the dataset")
483
+ return
484
+
485
+ # 1. When did adjusted_apr data start?
486
+ first_adjusted = has_adjusted['timestamp'].min()
487
+ last_adjusted = has_adjusted['timestamp'].max()
488
+ logger.info(f"ADJUSTED APR SUMMARY: First data point: {first_adjusted}")
489
+ logger.info(f"ADJUSTED APR SUMMARY: Last data point: {last_adjusted}")
490
+ logger.info(f"ADJUSTED APR SUMMARY: Data spans {(last_adjusted - first_adjusted).days} days")
491
+
492
+ # Calculate coverage percentage
493
+ total_records = len(df)
494
+ records_with_adjusted = len(has_adjusted)
495
+ coverage_pct = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0
496
+ logger.info(f"ADJUSTED APR SUMMARY: {records_with_adjusted} out of {total_records} records have adjusted_apr ({coverage_pct:.2f}%)")
497
+
498
+ # 2. How many agents are providing adjusted_apr?
499
+ agents_with_adjusted = has_adjusted['agent_id'].unique()
500
+ logger.info(f"ADJUSTED APR SUMMARY: {len(agents_with_adjusted)} agents providing adjusted_apr")
501
+ logger.info(f"ADJUSTED APR SUMMARY: Agents providing adjusted_apr: {list(agents_with_adjusted)}")
502
+
503
+ # 3. May 10th cutoff analysis
504
+ may_10_2025 = datetime(2025, 5, 10)
505
+ before_cutoff = df[df['timestamp'] < may_10_2025]
506
+ after_cutoff = df[df['timestamp'] >= may_10_2025]
507
+
508
+ if not before_cutoff.empty and not after_cutoff.empty:
509
+ before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum()
510
+ before_pct = (before_with_adjusted / len(before_cutoff)) * 100
511
+
512
+ after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum()
513
+ after_pct = (after_with_adjusted / len(after_cutoff)) * 100
514
+
515
+ logger.info(f"ADJUSTED APR SUMMARY: Before May 10th: {before_with_adjusted}/{len(before_cutoff)} records with adjusted_apr ({before_pct:.2f}%)")
516
+ logger.info(f"ADJUSTED APR SUMMARY: After May 10th: {after_with_adjusted}/{len(after_cutoff)} records with adjusted_apr ({after_pct:.2f}%)")
517
+
518
+ # Check which agents had data before and after
519
+ agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
520
+ agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
521
+
522
+ missing_after = agents_before - agents_after
523
+ if missing_after:
524
+ logger.warning(f"ADJUSTED APR SUMMARY: {len(missing_after)} agents stopped providing adjusted_apr after May 10th: {list(missing_after)}")
525
+
526
+ new_after = agents_after - agents_before
527
+ if new_after:
528
+ logger.info(f"ADJUSTED APR SUMMARY: {len(new_after)} agents started providing adjusted_apr after May 10th: {list(new_after)}")
529
+
530
+ # 4. Find date ranges for missing adjusted_apr
531
+ # Group by agent to analyze per-agent data availability
532
+ logger.info("=== DETAILED AGENT ANALYSIS ===")
533
+ for agent_id in df['agent_id'].unique():
534
+ agent_data = df[df['agent_id'] == agent_id]
535
+ agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}"
536
+
537
+ # Get the valid adjusted_apr values for this agent
538
+ agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()]
539
+
540
+ if agent_adjusted.empty:
541
+ logger.info(f"Agent {agent_name} (ID: {agent_id}): No adjusted_apr data available")
542
+ continue
543
+
544
+ # Get the date range for this agent's data
545
+ agent_start = agent_data['timestamp'].min()
546
+ agent_end = agent_data['timestamp'].max()
547
+
548
+ # Get the date range for adjusted_apr data
549
+ adjusted_start = agent_adjusted['timestamp'].min()
550
+ adjusted_end = agent_adjusted['timestamp'].max()
551
+
552
+ total_agent_records = len(agent_data)
553
+ agent_with_adjusted = len(agent_adjusted)
554
+ coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0
555
+
556
+ logger.info(f"Agent {agent_name} (ID: {agent_id}): {agent_with_adjusted}/{total_agent_records} records with adjusted_apr ({coverage_pct:.2f}%)")
557
+ logger.info(f"Agent {agent_name} (ID: {agent_id}): APR data from {agent_start} to {agent_end}")
558
+ logger.info(f"Agent {agent_name} (ID: {agent_id}): Adjusted APR data from {adjusted_start} to {adjusted_end}")
559
+
560
+ # Calculate if this agent had data before/after May 10th
561
+ if not before_cutoff.empty and not after_cutoff.empty:
562
+ agent_before = before_cutoff[before_cutoff['agent_id'] == agent_id]
563
+ agent_after = after_cutoff[after_cutoff['agent_id'] == agent_id]
564
+
565
+ has_before = not agent_before.empty and agent_before['adjusted_apr'].notna().any()
566
+ has_after = not agent_after.empty and agent_after['adjusted_apr'].notna().any()
567
+
568
+ if has_before and not has_after:
569
+ last_date = agent_before[agent_before['adjusted_apr'].notna()]['timestamp'].max()
570
+ logger.warning(f"Agent {agent_name} (ID: {agent_id}): Stopped providing adjusted_apr after May 10th. Last data point: {last_date}")
571
+ elif not has_before and has_after:
572
+ first_date = agent_after[agent_after['adjusted_apr'].notna()]['timestamp'].min()
573
+ logger.info(f"Agent {agent_name} (ID: {agent_id}): Started providing adjusted_apr after May 10th. First data point: {first_date}")
574
+
575
+ # Check for gaps in adjusted_apr (periods of 24+ hours without data)
576
+ if len(agent_adjusted) < 2:
577
+ continue
578
+
579
+ # Sort by timestamp
580
+ sorted_data = agent_adjusted.sort_values('timestamp')
581
+
582
+ # Calculate time differences between consecutive data points
583
+ time_diffs = sorted_data['timestamp'].diff()
584
+
585
+ # Find gaps larger than 24 hours
586
+ gaps = sorted_data[time_diffs > pd.Timedelta(hours=24)]
587
+
588
+ if not gaps.empty:
589
+ logger.info(f"Agent {agent_name} (ID: {agent_id}): Found {len(gaps)} gaps in adjusted_apr data")
590
+
591
+ # Log the gaps
592
+ for i, row in gaps.iterrows():
593
+ # Find the previous timestamp before the gap
594
+ prev_idx = sorted_data.index.get_loc(i) - 1
595
+ prev_time = sorted_data.iloc[prev_idx]['timestamp'] if prev_idx >= 0 else None
596
+
597
+ if prev_time:
598
+ gap_start = prev_time
599
+ gap_end = row['timestamp']
600
+ gap_duration = gap_end - gap_start
601
+ logger.info(f"Agent {agent_name} (ID: {agent_id}): Missing adjusted_apr from {gap_start} to {gap_end} ({gap_duration.days} days, {gap_duration.seconds//3600} hours)")
602
+
603
  def generate_apr_visualizations():
604
  """Generate APR visualizations with real data only (no dummy data)"""
605
  global global_df
 
934
  avg_apr_data_with_ma['moving_avg'] = None # 3-day window for APR
935
  avg_apr_data_with_ma['adjusted_moving_avg'] = None # 3-day window for adjusted APR
936
 
937
+ # Keep track of the last valid adjusted_moving_avg value to handle gaps
938
+ last_valid_adjusted_moving_avg = None
939
+
940
  # Calculate the moving averages for each timestamp
941
  for i, row in avg_apr_data_with_ma.iterrows():
942
  current_time = row['timestamp']
 
954
  logger.debug(f"APR time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['apr'].mean()}")
955
 
956
  # Calculate adjusted APR moving average if data exists
957
+ has_adjusted_apr = 'adjusted_apr' in window_data.columns and window_data['adjusted_apr'].notna().any()
958
+ if has_adjusted_apr:
959
+ adjusted_avg = window_data['adjusted_apr'].dropna().mean()
960
+ avg_apr_data_with_ma.at[i, 'adjusted_moving_avg'] = adjusted_avg
961
+ last_valid_adjusted_moving_avg = adjusted_avg
962
+ logger.debug(f"Adjusted APR time window {window_start} to {current_time}: {len(window_data)} points, avg={adjusted_avg}")
963
+ else:
964
+ # If we don't have adjusted_apr data in this window but had some previously,
965
+ # use the last valid value to maintain continuity in the graph
966
+ if last_valid_adjusted_moving_avg is not None:
967
+ avg_apr_data_with_ma.at[i, 'adjusted_moving_avg'] = last_valid_adjusted_moving_avg
968
+ logger.debug(f"No adjusted APR data in window, using last valid value: {last_valid_adjusted_moving_avg}")
969
  else:
970
  # If no data points in the window, use the current value
971
  avg_apr_data_with_ma.at[i, 'moving_avg'] = row['apr']
 
1072
 
1073
  # Add adjusted APR moving average line if it exists
1074
  if 'adjusted_moving_avg' in avg_apr_data_with_ma.columns and avg_apr_data_with_ma['adjusted_moving_avg'].notna().any():
1075
+ # Create a copy of the dataframe with forward-filled adjusted_moving_avg values
1076
+ # to ensure the line continues even when we have missing data
1077
+ filled_avg_apr_data = avg_apr_data_with_ma.copy()
1078
+ filled_avg_apr_data['adjusted_moving_avg'] = filled_avg_apr_data['adjusted_moving_avg'].fillna(method='ffill')
1079
+
1080
+ # Use the filled dataframe for the adjusted APR line
1081
+ x_values_adj = filled_avg_apr_data['timestamp'].tolist()
1082
+ y_values_adj_ma = filled_avg_apr_data['adjusted_moving_avg'].tolist()
1083
 
1084
  # Create hover template for the adjusted APR moving average line
1085
  hover_data_adj = []
1086
+ for idx, row in filled_avg_apr_data.iterrows():
1087
  timestamp = row['timestamp']
1088
  if pd.notna(row['adjusted_moving_avg']):
1089
  hover_data_adj.append(
 
1096
 
1097
  fig.add_trace(
1098
  go.Scatter(
1099
+ x=x_values_adj,
1100
  y=y_values_adj_ma,
1101
  mode='lines', # Only lines for moving average
1102
  line=dict(color='green', width=4), # Thicker solid line for adjusted APR
 
1106
  visible=True # Visible by default
1107
  )
1108
  )
1109
+ logger.info(f"Added 3-day moving average Adjusted APR trace with {len(x_values_adj)} points (with forward-filling for missing values)")
1110
+ else:
1111
+ logger.warning("No adjusted APR moving average data available to plot")
1112
 
1113
  # Removed cumulative APR as requested
1114
  logger.info("Cumulative APR graph line has been removed as requested")
 
2033
 
2034
  # Function to update the graph without parameters (for refresh button)
2035
  def refresh_graph():
2036
+ """Refresh APR data from the database and update the visualization"""
2037
+ try:
2038
+ # Fetch new APR data
2039
+ logger.info("Manually refreshing APR data...")
2040
+ fetch_apr_data_from_db()
2041
+
2042
+ # Verify data was fetched successfully
2043
+ if global_df is None or len(global_df) == 0:
2044
+ logger.error("Failed to fetch APR data")
2045
+ return combined_graph.value, "Error: Failed to fetch APR data. Check the logs for details."
2046
+
2047
+ # Log info about fetched data with focus on adjusted_apr
2048
+ may_10_2025 = datetime(2025, 5, 10)
2049
+ if 'timestamp' in global_df and 'adjusted_apr' in global_df:
2050
+ after_may_10 = global_df[global_df['timestamp'] >= may_10_2025]
2051
+ with_adjusted_after_may_10 = after_may_10[after_may_10['adjusted_apr'].notna()]
2052
+
2053
+ logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}")
2054
+ logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}")
2055
+
2056
+ # Generate new visualization
2057
+ logger.info("Generating new APR visualization...")
2058
+ new_graph = update_apr_graph(apr_toggle.value, adjusted_apr_toggle.value)
2059
+ return new_graph, "APR data refreshed successfully"
2060
+ except Exception as e:
2061
+ logger.error(f"Error refreshing APR data: {e}")
2062
+ return combined_graph.value, f"Error: {str(e)}"
2063
+
2064
+ # Add a text area for status messages
2065
+ status_text = gr.Textbox(label="Status", value="Ready", interactive=False)
2066
 
2067
+ # Set up the button click event for refresh
2068
+ refresh_btn.click(
2069
+ fn=refresh_graph,
2070
+ inputs=[],
2071
+ outputs=[combined_graph, status_text]
2072
+ )
2073
 
2074
  # Set up the toggle switch events
2075
  apr_toggle.change(
 
2089
  # Launch the dashboard
2090
  if __name__ == "__main__":
2091
  dashboard().launch()
2092
+
2093
+ def generate_adjusted_apr_report():
2094
+ """
2095
+ Generate a detailed report about adjusted_apr data availability and save it to a file.
2096
+ Returns the path to the generated report file.
2097
+ """
2098
+ global global_df
2099
+
2100
+ if global_df is None or global_df.empty or 'adjusted_apr' not in global_df.columns:
2101
+ logger.warning("No adjusted_apr data available for report generation")
2102
+ return None
2103
+
2104
+ # Create a report file
2105
+ report_path = "adjusted_apr_report.txt"
2106
+
2107
+ with open(report_path, "w") as f:
2108
+ f.write("======== ADJUSTED APR DATA AVAILABILITY REPORT ========\n\n")
2109
+
2110
+ # Summary statistics
2111
+ total_records = len(global_df)
2112
+ records_with_adjusted = global_df['adjusted_apr'].notna().sum()
2113
+ pct_with_adjusted = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0
2114
+
2115
+ f.write(f"Total APR records: {total_records}\n")
2116
+ f.write(f"Records with adjusted_apr: {records_with_adjusted} ({pct_with_adjusted:.2f}%)\n\n")
2117
+
2118
+ # First and last data points
2119
+ if records_with_adjusted > 0:
2120
+ has_adjusted = global_df[global_df['adjusted_apr'].notna()]
2121
+ first_date = has_adjusted['timestamp'].min()
2122
+ last_date = has_adjusted['timestamp'].max()
2123
+ f.write(f"First adjusted_apr record: {first_date}\n")
2124
+ f.write(f"Last adjusted_apr record: {last_date}\n")
2125
+ f.write(f"Date range: {(last_date - first_date).days} days\n\n")
2126
+
2127
+ # Agent statistics
2128
+ f.write("===== AGENT STATISTICS =====\n\n")
2129
+
2130
+ # Group by agent
2131
+ agent_stats = []
2132
+
2133
+ for agent_id in global_df['agent_id'].unique():
2134
+ agent_data = global_df[global_df['agent_id'] == agent_id]
2135
+ agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}"
2136
+
2137
+ total_agent_records = len(agent_data)
2138
+ agent_with_adjusted = agent_data['adjusted_apr'].notna().sum()
2139
+ coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0
2140
+
2141
+ agent_stats.append({
2142
+ 'agent_id': agent_id,
2143
+ 'agent_name': agent_name,
2144
+ 'total_records': total_agent_records,
2145
+ 'with_adjusted': agent_with_adjusted,
2146
+ 'coverage_pct': coverage_pct
2147
+ })
2148
+
2149
+ # Sort by coverage percentage (descending)
2150
+ agent_stats.sort(key=lambda x: x['coverage_pct'], reverse=True)
2151
+
2152
+ # Write agent statistics
2153
+ for agent in agent_stats:
2154
+ f.write(f"Agent: {agent['agent_name']} (ID: {agent['agent_id']})\n")
2155
+ f.write(f" Records: {agent['total_records']}\n")
2156
+ f.write(f" With adjusted_apr: {agent['with_adjusted']} ({agent['coverage_pct']:.2f}%)\n")
2157
+
2158
+ # If agent has adjusted data, show date range
2159
+ agent_data = global_df[global_df['agent_id'] == agent['agent_id']]
2160
+ agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()]
2161
+
2162
+ if not agent_adjusted.empty:
2163
+ first = agent_adjusted['timestamp'].min()
2164
+ last = agent_adjusted['timestamp'].max()
2165
+ f.write(f" First adjusted_apr: {first}\n")
2166
+ f.write(f" Last adjusted_apr: {last}\n")
2167
+
2168
+ f.write("\n")
2169
+
2170
+ # Check for May 10th cutoff issue
2171
+ f.write("===== MAY 10TH CUTOFF ANALYSIS =====\n\n")
2172
+ may_10_2025 = datetime(2025, 5, 10)
2173
+
2174
+ before_cutoff = global_df[global_df['timestamp'] < may_10_2025]
2175
+ after_cutoff = global_df[global_df['timestamp'] >= may_10_2025]
2176
+
2177
+ # Calculate coverage before and after
2178
+ before_total = len(before_cutoff)
2179
+ before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum()
2180
+ before_pct = (before_with_adjusted / before_total) * 100 if before_total > 0 else 0
2181
+
2182
+ after_total = len(after_cutoff)
2183
+ after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum()
2184
+ after_pct = (after_with_adjusted / after_total) * 100 if after_total > 0 else 0
2185
+
2186
+ f.write(f"Before May 10th, 2025:\n")
2187
+ f.write(f" Records: {before_total}\n")
2188
+ f.write(f" With adjusted_apr: {before_with_adjusted} ({before_pct:.2f}%)\n\n")
2189
+
2190
+ f.write(f"After May 10th, 2025:\n")
2191
+ f.write(f" Records: {after_total}\n")
2192
+ f.write(f" With adjusted_apr: {after_with_adjusted} ({after_pct:.2f}%)\n\n")
2193
+
2194
+ # Check for agents that had data before but not after
2195
+ if before_total > 0 and after_total > 0:
2196
+ agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
2197
+ agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
2198
+
2199
+ missing_after = agents_before - agents_after
2200
+ new_after = agents_after - agents_before
2201
+
2202
+ if missing_after:
2203
+ f.write(f"Agents with adjusted_apr before May 10th but not after: {list(missing_after)}\n")
2204
+
2205
+ # For each missing agent, show the last date with adjusted_apr
2206
+ for agent_id in missing_after:
2207
+ agent_data = before_cutoff[(before_cutoff['agent_id'] == agent_id) &
2208
+ (before_cutoff['adjusted_apr'].notna())]
2209
+ if not agent_data.empty:
2210
+ last_date = agent_data['timestamp'].max()
2211
+ agent_name = agent_data['agent_name'].iloc[0]
2212
+ f.write(f" {agent_name} (ID: {agent_id}): Last adjusted_apr on {last_date}\n")
2213
+
2214
+ if new_after:
2215
+ f.write(f"\nAgents with adjusted_apr after May 10th but not before: {list(new_after)}\n")
2216
+
2217
+ logger.info(f"Adjusted APR report generated: {report_path}")
2218
+ return report_path