codelion commited on
Commit
7fa46e2
·
verified ·
1 Parent(s): c655f91

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -8
app.py CHANGED
@@ -308,25 +308,93 @@ def detect_interpretation_pivots(logprobs, tokens):
308
  return "No interpretation pivots detected.", None
309
  return "Interpretation pivots detected:", pivots
310
 
311
- def calculate_decision_entropy(logprobs):
312
  if not logprobs:
313
  return "No data for entropy spike detection.", None
 
 
314
  entropies = []
315
  for lps in logprobs:
316
- if not lps:
317
  entropies.append(0.0)
318
  continue
319
- probs = [math.exp(p) for _, p in lps if p is not None] # Convert log probs to probabilities, handle None
 
 
 
 
 
320
  if not probs or sum(probs) == 0:
321
  entropies.append(0.0)
322
  continue
323
- entropy = -sum(p * math.log(p) for p in probs if p > 0)
 
 
 
 
324
  entropies.append(entropy)
325
- baseline = np.percentile(entropies, 75) if entropies else 0.0
326
- spikes = [i for i, e in enumerate(entropies) if e > baseline * 1.5 and baseline > 0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  if not spikes:
328
- return "No entropy spikes detected at decision points.", None
329
- return "Entropy spikes detected at positions:", spikes
 
 
 
 
 
 
330
 
331
  def analyze_conclusion_competition(logprobs, tokens):
332
  if not logprobs or not tokens:
 
308
  return "No interpretation pivots detected.", None
309
  return "Interpretation pivots detected:", pivots
310
 
311
+ def calculate_decision_entropy(logprobs, tokens=None):
312
  if not logprobs:
313
  return "No data for entropy spike detection.", None
314
+
315
+ # Calculate entropy at each position
316
  entropies = []
317
  for lps in logprobs:
318
+ if not lps or len(lps) < 2: # Need at least two tokens for meaningful entropy
319
  entropies.append(0.0)
320
  continue
321
+
322
+ # Only use top-5 tokens for entropy calculation to reduce noise
323
+ top_k = min(5, len(lps))
324
+ probs = [math.exp(p) for _, p in lps[:top_k] if p is not None]
325
+
326
+ # Normalize probabilities to sum to 1
327
  if not probs or sum(probs) == 0:
328
  entropies.append(0.0)
329
  continue
330
+
331
+ prob_sum = sum(probs)
332
+ normalized_probs = [p/prob_sum for p in probs]
333
+
334
+ entropy = -sum(p * math.log(p) for p in normalized_probs if p > 0)
335
  entropies.append(entropy)
336
+
337
+ # Smooth entropy values with moving average
338
+ window_size = 15
339
+ if len(entropies) >= window_size:
340
+ smoothed_entropies = np.convolve(entropies, np.ones(window_size)/window_size, mode='valid')
341
+ else:
342
+ smoothed_entropies = entropies
343
+
344
+ # More selective threshold - 90th percentile and 2x multiplier
345
+ baseline = np.percentile(smoothed_entropies, 90) if smoothed_entropies.size > 0 else 0.0
346
+
347
+ # Find significant spikes (much more selective)
348
+ spikes = []
349
+ if baseline > 0:
350
+ raw_spikes = np.where(smoothed_entropies > baseline * 2.0)[0]
351
+
352
+ # Cluster nearby spikes (within 20 tokens)
353
+ if raw_spikes.size > 0:
354
+ spikes = [raw_spikes[0]]
355
+ for spike in raw_spikes[1:]:
356
+ if spike - spikes[-1] > 20:
357
+ spikes.append(spike)
358
+
359
+ # If we have token information, check context around spikes
360
+ if tokens and spikes:
361
+ context_spikes = []
362
+ decision_markers = ["therefore", "thus", "so", "hence", "because",
363
+ "wait", "but", "however", "actually", "instead"]
364
+
365
+ for spike in spikes:
366
+ # Adjust index for convolution window if using smoothed values
367
+ spike_idx = spike + window_size//2 if len(entropies) >= window_size else spike
368
+
369
+ if spike_idx >= len(tokens):
370
+ continue
371
+
372
+ # Check surrounding context (15 tokens before and after)
373
+ start_idx = max(0, spike_idx - 15)
374
+ end_idx = min(len(tokens), spike_idx + 15)
375
+
376
+ if end_idx <= start_idx:
377
+ continue
378
+
379
+ context = " ".join(tokens[start_idx:end_idx])
380
+
381
+ # Only keep spikes near reasoning transitions
382
+ if any(marker in context.lower() for marker in decision_markers):
383
+ entropy_value = smoothed_entropies[spike - window_size//2] if len(entropies) >= window_size else entropies[spike]
384
+ context_spikes.append((spike_idx, entropy_value, tokens[spike_idx] if spike_idx < len(tokens) else "End"))
385
+
386
+ spikes = context_spikes
387
+
388
+ # Return at most 3 most significant spikes
389
  if not spikes:
390
+ return "No significant entropy spikes detected at decision points.", None
391
+
392
+ # Sort by entropy value (highest first) if we have context information
393
+ if tokens and spikes:
394
+ spikes.sort(key=lambda x: x[1], reverse=True)
395
+ return "Significant entropy spikes detected at positions:", spikes[:3]
396
+
397
+ return "Entropy spikes detected at positions:", spikes[:3]
398
 
399
  def analyze_conclusion_competition(logprobs, tokens):
400
  if not logprobs or not tokens: