Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -8,8 +8,6 @@ import math
|
|
8 |
import logging
|
9 |
import numpy as np
|
10 |
import plotly.graph_objects as go
|
11 |
-
import asyncio
|
12 |
-
import threading
|
13 |
|
14 |
# Set up logging
|
15 |
logging.basicConfig(level=logging.DEBUG)
|
@@ -30,7 +28,7 @@ def parse_input(json_input):
|
|
30 |
def ensure_float(value):
|
31 |
if value is None:
|
32 |
logger.debug("Replacing None logprob with 0.0")
|
33 |
-
return 0.0 # Default to 0.0 for None
|
34 |
if isinstance(value, str):
|
35 |
try:
|
36 |
return float(value)
|
@@ -52,8 +50,8 @@ def get_token(entry):
|
|
52 |
def create_empty_figure(title):
|
53 |
return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
|
54 |
|
55 |
-
# Precompute the next chunk
|
56 |
-
|
57 |
try:
|
58 |
data = parse_input(json_input)
|
59 |
content = data.get("content", []) if isinstance(data, dict) else data
|
@@ -71,10 +69,7 @@ async def precompute_chunk(json_input, chunk_size, current_chunk):
|
|
71 |
if logprob >= -100000: # Include all entries with default 0.0
|
72 |
tokens.append(get_token(entry))
|
73 |
logprobs.append(logprob)
|
74 |
-
top_probs = entry.get("top_logprobs", {})
|
75 |
-
if top_probs is None:
|
76 |
-
logger.debug("top_logprobs is None for token: %s, using empty dict", get_token(entry))
|
77 |
-
top_probs = {}
|
78 |
finite_top_probs = []
|
79 |
for key, value in top_probs.items():
|
80 |
float_value = ensure_float(value)
|
@@ -92,28 +87,11 @@ async def precompute_chunk(json_input, chunk_size, current_chunk):
|
|
92 |
if start_idx >= len(tokens):
|
93 |
return None, None, None
|
94 |
|
95 |
-
|
96 |
-
paginated_logprobs = logprobs[start_idx:end_idx]
|
97 |
-
paginated_alternatives = top_alternatives[start_idx:end_idx]
|
98 |
-
|
99 |
-
return paginated_tokens, paginated_logprobs, paginated_alternatives
|
100 |
except Exception as e:
|
101 |
logger.error("Precomputation failed for chunk %d: %s", current_chunk + 1, str(e))
|
102 |
return None, None, None
|
103 |
|
104 |
-
# Synchronous wrapper for precomputation using threading
|
105 |
-
def precompute_next_chunk_sync(json_input, current_chunk):
|
106 |
-
loop = asyncio.new_event_loop()
|
107 |
-
asyncio.set_event_loop(loop)
|
108 |
-
try:
|
109 |
-
result = loop.run_until_complete(precompute_chunk(json_input, 100, current_chunk))
|
110 |
-
except Exception as e:
|
111 |
-
logger.error("Precomputation error: %s", str(e))
|
112 |
-
result = None, None, None
|
113 |
-
finally:
|
114 |
-
loop.close()
|
115 |
-
return result
|
116 |
-
|
117 |
# Function to process and visualize a chunk of log probs with dynamic top_logprobs
|
118 |
def visualize_logprobs(json_input, chunk=0, chunk_size=100):
|
119 |
try:
|
@@ -260,14 +238,14 @@ def visualize_logprobs(json_input, chunk=0, chunk_size=100):
|
|
260 |
def analyze_confidence_signature(logprobs, tokens):
|
261 |
if not logprobs or not tokens:
|
262 |
return "No data for confidence signature analysis.", None
|
263 |
-
top_probs = [lps[0][1] if lps and lps[0][1] is not None else -float('inf') for lps in logprobs] #
|
264 |
if not any(p != -float('inf') for p in top_probs):
|
265 |
return "No valid log probabilities for confidence analysis.", None
|
266 |
moving_avg = np.convolve(top_probs, np.ones(20) / 20, mode='valid') # 20-token window
|
267 |
drops = np.where(np.diff(moving_avg) < -0.15)[0] # Significant drops
|
268 |
if not drops.size:
|
269 |
return "No significant confidence drops detected.", None
|
270 |
-
drop_positions = [(i, tokens[i + 19] if i + 19 < len(tokens) else "End of trace") for i in drops]
|
271 |
return "Significant confidence drops detected at positions:", drop_positions
|
272 |
|
273 |
def detect_interpretation_pivots(logprobs, tokens):
|
@@ -420,7 +398,7 @@ def analyze_full_trace(json_input):
|
|
420 |
try:
|
421 |
with gr.Blocks(title="Log Probability Visualizer") as app:
|
422 |
gr.Markdown("# Log Probability Visualizer")
|
423 |
-
gr.Markdown("Paste your JSON log prob data below to analyze reasoning traces or visualize tokens in chunks of 100. Fixed filter ≥ -100000, dynamic number of top_logprobs, handles missing or null fields.
|
424 |
|
425 |
with gr.Tabs():
|
426 |
with gr.Tab("Trace Analysis"):
|
@@ -475,6 +453,9 @@ try:
|
|
475 |
outputs=[plot_output, table_output, text_output, alt_viz_output, drops_output, total_chunks_output, chunk],
|
476 |
)
|
477 |
|
|
|
|
|
|
|
478 |
def update_chunk(json_input, current_chunk, action, precomputed_next=None):
|
479 |
total_chunks = visualize_logprobs(json_input, 0)[5] # Get total chunks
|
480 |
if action == "prev" and current_chunk > 0:
|
@@ -500,7 +481,8 @@ try:
|
|
500 |
|
501 |
def trigger_precomputation(json_input, current_chunk):
|
502 |
try:
|
503 |
-
|
|
|
504 |
except Exception as e:
|
505 |
logger.error("Precomputation trigger failed: %s", str(e))
|
506 |
return gr.update(value=current_chunk)
|
|
|
8 |
import logging
|
9 |
import numpy as np
|
10 |
import plotly.graph_objects as go
|
|
|
|
|
11 |
|
12 |
# Set up logging
|
13 |
logging.basicConfig(level=logging.DEBUG)
|
|
|
28 |
def ensure_float(value):
|
29 |
if value is None:
|
30 |
logger.debug("Replacing None logprob with 0.0")
|
31 |
+
return 0.0 # Default to 0.0 for None
|
32 |
if isinstance(value, str):
|
33 |
try:
|
34 |
return float(value)
|
|
|
50 |
def create_empty_figure(title):
|
51 |
return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
|
52 |
|
53 |
+
# Precompute the next chunk (synchronous for Hugging Face Spaces)
|
54 |
+
def precompute_chunk(json_input, chunk_size, current_chunk):
|
55 |
try:
|
56 |
data = parse_input(json_input)
|
57 |
content = data.get("content", []) if isinstance(data, dict) else data
|
|
|
69 |
if logprob >= -100000: # Include all entries with default 0.0
|
70 |
tokens.append(get_token(entry))
|
71 |
logprobs.append(logprob)
|
72 |
+
top_probs = entry.get("top_logprobs", {}) or {}
|
|
|
|
|
|
|
73 |
finite_top_probs = []
|
74 |
for key, value in top_probs.items():
|
75 |
float_value = ensure_float(value)
|
|
|
87 |
if start_idx >= len(tokens):
|
88 |
return None, None, None
|
89 |
|
90 |
+
return tokens[start_idx:end_idx], logprobs[start_idx:end_idx], top_alternatives[start_idx:end_idx]
|
|
|
|
|
|
|
|
|
91 |
except Exception as e:
|
92 |
logger.error("Precomputation failed for chunk %d: %s", current_chunk + 1, str(e))
|
93 |
return None, None, None
|
94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
# Function to process and visualize a chunk of log probs with dynamic top_logprobs
|
96 |
def visualize_logprobs(json_input, chunk=0, chunk_size=100):
|
97 |
try:
|
|
|
238 |
def analyze_confidence_signature(logprobs, tokens):
|
239 |
if not logprobs or not tokens:
|
240 |
return "No data for confidence signature analysis.", None
|
241 |
+
top_probs = [lps[0][1] if lps and lps[0][1] is not None else -float('inf') for lps in logprobs] # Extract top probability, handle empty or None
|
242 |
if not any(p != -float('inf') for p in top_probs):
|
243 |
return "No valid log probabilities for confidence analysis.", None
|
244 |
moving_avg = np.convolve(top_probs, np.ones(20) / 20, mode='valid') # 20-token window
|
245 |
drops = np.where(np.diff(moving_avg) < -0.15)[0] # Significant drops
|
246 |
if not drops.size:
|
247 |
return "No significant confidence drops detected.", None
|
248 |
+
drop_positions = [(i, tokens[i + 19] if i + 19 < len(tokens) else "End of trace") for i in drops] # Adjust for convolution window
|
249 |
return "Significant confidence drops detected at positions:", drop_positions
|
250 |
|
251 |
def detect_interpretation_pivots(logprobs, tokens):
|
|
|
398 |
try:
|
399 |
with gr.Blocks(title="Log Probability Visualizer") as app:
|
400 |
gr.Markdown("# Log Probability Visualizer")
|
401 |
+
gr.Markdown("Paste your JSON log prob data below to analyze reasoning traces or visualize tokens in chunks of 100. Fixed filter ≥ -100000, dynamic number of top_logprobs, handles missing or null fields.")
|
402 |
|
403 |
with gr.Tabs():
|
404 |
with gr.Tab("Trace Analysis"):
|
|
|
453 |
outputs=[plot_output, table_output, text_output, alt_viz_output, drops_output, total_chunks_output, chunk],
|
454 |
)
|
455 |
|
456 |
+
def precompute_next_chunk(json_input, current_chunk):
|
457 |
+
return precompute_chunk(json_input, 100, current_chunk)
|
458 |
+
|
459 |
def update_chunk(json_input, current_chunk, action, precomputed_next=None):
|
460 |
total_chunks = visualize_logprobs(json_input, 0)[5] # Get total chunks
|
461 |
if action == "prev" and current_chunk > 0:
|
|
|
481 |
|
482 |
def trigger_precomputation(json_input, current_chunk):
|
483 |
try:
|
484 |
+
precomputed = precompute_next_chunk(json_input, current_chunk)
|
485 |
+
precomputed_next.value = precomputed # Update state directly
|
486 |
except Exception as e:
|
487 |
logger.error("Precomputation trigger failed: %s", str(e))
|
488 |
return gr.update(value=current_chunk)
|