codelion commited on
Commit
485d05c
·
verified ·
1 Parent(s): d38b65e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -7
app.py CHANGED
@@ -238,15 +238,58 @@ def visualize_logprobs(json_input, chunk=0, chunk_size=100):
238
  def analyze_confidence_signature(logprobs, tokens):
239
  if not logprobs or not tokens:
240
  return "No data for confidence signature analysis.", None
241
- top_probs = [lps[0][1] if lps and lps[0][1] is not None else -float('inf') for lps in logprobs] # Extract top probability, handle empty or None
 
 
242
  if not any(p != -float('inf') for p in top_probs):
243
  return "No valid log probabilities for confidence analysis.", None
244
- moving_avg = np.convolve(top_probs, np.ones(20) / 20, mode='valid') # 20-token window
245
- drops = np.where(np.diff(moving_avg) < -0.15)[0] # Significant drops
246
- if not drops.size:
247
- return "No significant confidence drops detected.", None
248
- drop_positions = [(i, tokens[i + 19] if i + 19 < len(tokens) else "End of trace") for i in drops] # Adjust for convolution window
249
- return "Significant confidence drops detected at positions:", drop_positions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
 
251
  def detect_interpretation_pivots(logprobs, tokens):
252
  if not logprobs or not tokens:
 
238
  def analyze_confidence_signature(logprobs, tokens):
239
  if not logprobs or not tokens:
240
  return "No data for confidence signature analysis.", None
241
+
242
+ # Extract top probabilities
243
+ top_probs = [lps[0][1] if lps and lps[0][1] is not None else -float('inf') for lps in logprobs]
244
  if not any(p != -float('inf') for p in top_probs):
245
  return "No valid log probabilities for confidence analysis.", None
246
+
247
+ # Use a larger window for smoother trends
248
+ window_size = 30 # Increased from 20
249
+ moving_avg = np.convolve(top_probs, np.ones(window_size) / window_size, mode='valid')
250
+
251
+ # Calculate drop magnitudes
252
+ drops = np.diff(moving_avg)
253
+
254
+ # Use adaptive thresholding - only flag drops in the bottom 5% of all changes
255
+ drop_threshold = np.percentile(drops, 5) # More selective
256
+ significant_drops = np.where(drops < drop_threshold)[0]
257
+
258
+ # Cluster nearby drops (within 10 tokens) to avoid reporting multiple points in the same reasoning shift
259
+ if len(significant_drops) > 0:
260
+ clustered_drops = [significant_drops[0]]
261
+ for drop in significant_drops[1:]:
262
+ if drop - clustered_drops[-1] > 10: # At least 10 tokens apart
263
+ clustered_drops.append(drop)
264
+ else:
265
+ clustered_drops = []
266
+
267
+ # Look for context markers near drops
268
+ filtered_drops = []
269
+ reasoning_markers = ["therefore", "thus", "so", "hence", "wait", "but", "however", "actually"]
270
+
271
+ for drop in clustered_drops:
272
+ # Adjust index for convolution window
273
+ token_idx = drop + window_size - 1
274
+
275
+ # Check surrounding context (10 tokens before and after)
276
+ start_idx = max(0, token_idx - 10)
277
+ end_idx = min(len(tokens), token_idx + 10)
278
+ context = " ".join(tokens[start_idx:end_idx])
279
+
280
+ # Only keep drops near reasoning transition markers
281
+ if any(marker in context.lower() for marker in reasoning_markers):
282
+ drop_magnitude = drops[drop]
283
+ filtered_drops.append((token_idx, drop_magnitude, tokens[token_idx] if token_idx < len(tokens) else "End of trace"))
284
+
285
+ # Sort by drop magnitude (largest drops first)
286
+ filtered_drops.sort(key=lambda x: x[1])
287
+
288
+ if not filtered_drops:
289
+ return "No significant confidence shifts at reasoning transitions detected.", None
290
+
291
+ # Return at most 3 most significant drops
292
+ return "Significant confidence shifts detected at reasoning transitions:", filtered_drops[:3]
293
 
294
  def detect_interpretation_pivots(logprobs, tokens):
295
  if not logprobs or not tokens: