codelion commited on
Commit
b766b6b
·
verified ·
1 Parent(s): cf7578d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -43
app.py CHANGED
@@ -10,7 +10,6 @@ import logging
10
  import numpy as np
11
  import plotly.graph_objects as go
12
  from plotly.subplots import make_subplots
13
- from scipy import stats
14
 
15
  # Set up logging
16
  logging.basicConfig(level=logging.DEBUG)
@@ -60,7 +59,7 @@ def ensure_float(value):
60
  return None
61
 
62
  # Function to process and visualize log probs with interactive Plotly plots
63
- def visualize_logprobs(json_input, prob_filter=-1e9, page_size=50, page=0):
64
  try:
65
  # Parse the input (handles both JSON and Python dictionaries)
66
  data = parse_input(json_input)
@@ -73,13 +72,13 @@ def visualize_logprobs(json_input, prob_filter=-1e9, page_size=50, page=0):
73
  else:
74
  raise ValueError("Input must be a list or dictionary with 'content' key")
75
 
76
- # Extract tokens, log probs, and top alternatives, skipping None or non-finite values
77
  tokens = []
78
  logprobs = []
79
  top_alternatives = [] # List to store top 3 log probs (selected token + 2 alternatives)
80
  for entry in content:
81
  logprob = ensure_float(entry.get("logprob", None))
82
- if logprob is not None and math.isfinite(logprob) and logprob >= prob_filter:
83
  tokens.append(entry["token"])
84
  logprobs.append(logprob)
85
  # Get top_logprobs, default to empty dict if None
@@ -103,7 +102,8 @@ def visualize_logprobs(json_input, prob_filter=-1e9, page_size=50, page=0):
103
  if not logprobs or not tokens:
104
  return (gr.update(value="No finite log probabilities or tokens to visualize after filtering"), None, None, None, 1, 0)
105
 
106
- # Paginate data for large inputs
 
107
  total_pages = max(1, (len(logprobs) + page_size - 1) // page_size)
108
  start_idx = page * page_size
109
  end_idx = min((page + 1) * page_size, len(logprobs))
@@ -146,33 +146,11 @@ def visualize_logprobs(json_input, prob_filter=-1e9, page_size=50, page=0):
146
  hovertemplate='<b>%{customdata}</b><extra></extra>'
147
  )
148
 
149
- # 3. Anomaly Detection (Interactive Plotly)
150
- if not paginated_logprobs:
151
- anomaly_fig = go.Figure()
152
- anomaly_fig.add_trace(go.Scatter(x=[], y=[], mode='markers+lines', name='Log Prob', marker_color='blue'))
153
- else:
154
- z_scores = np.abs(stats.zscore(paginated_logprobs))
155
- outliers = z_scores > 2 # Threshold for outliers
156
- anomaly_fig = go.Figure()
157
- anomaly_fig.add_trace(go.Scatter(x=list(range(len(paginated_logprobs))), y=paginated_logprobs, mode='markers+lines', name='Log Prob', marker_color='blue'))
158
- anomaly_fig.add_trace(go.Scatter(x=np.where(outliers)[0], y=[paginated_logprobs[i] for i in np.where(outliers)[0]], mode='markers', name='Outliers', marker_color='red'))
159
- anomaly_fig.update_layout(
160
- title="Log Probabilities with Outliers",
161
- xaxis_title="Token Position",
162
- yaxis_title="Log Probability",
163
- hovermode="closest",
164
- clickmode='event+select'
165
- )
166
- anomaly_fig.update_traces(
167
- customdata=[f"Token: {tok}, Log Prob: {prob:.4f}, Position: {i+start_idx}, Outlier: {out}" for i, (tok, prob, out) in enumerate(zip(paginated_tokens, paginated_logprobs, outliers))],
168
- hovertemplate='<b>%{customdata}</b><extra></extra>'
169
- )
170
-
171
  # Create DataFrame for the table (paginated)
172
  table_data = []
173
  for i, entry in enumerate(content[start_idx:end_idx]):
174
  logprob = ensure_float(entry.get("logprob", None))
175
- if logprob is not None and math.isfinite(logprob) and logprob >= prob_filter and "top_logprobs" in entry and entry["top_logprobs"] is not None:
176
  token = entry["token"]
177
  top_logprobs = entry["top_logprobs"]
178
  # Ensure all values in top_logprobs are floats
@@ -230,9 +208,8 @@ def visualize_logprobs(json_input, prob_filter=-1e9, page_size=50, page=0):
230
  colored_text_html = "No finite log probabilities to display."
231
 
232
  # Top 3 Token Log Probabilities (paginated)
233
- alt_viz_html = ""
234
  if paginated_logprobs and paginated_alternatives:
235
- alt_viz_fig = go.Figure()
236
  for i, (token, probs) in enumerate(zip(paginated_tokens, paginated_alternatives)):
237
  for j, (alt_tok, prob) in enumerate(probs):
238
  alt_viz_fig.add_trace(go.Bar(x=[f"{token} (Pos {i+start_idx})"], y=[prob], name=f"{alt_tok}", marker_color=['blue', 'green', 'red'][j]))
@@ -252,17 +229,17 @@ def visualize_logprobs(json_input, prob_filter=-1e9, page_size=50, page=0):
252
  else:
253
  alt_viz_html = "No finite log probabilities to display."
254
 
255
- return (main_fig, df, colored_text_html, alt_viz_html, drops_fig, anomaly_fig, total_pages, page)
256
 
257
  except Exception as e:
258
  logger.error("Visualization failed: %s", str(e))
259
- return (gr.update(value=f"Error: {str(e)}"), None, "No finite log probabilities to display.", None, gr.update(value="No data for probability drops."), gr.update(value="No data for anomalies."), 1, 0)
260
 
261
  # Gradio interface with interactive layout and pagination
262
  with gr.Blocks(title="Log Probability Visualizer") as app:
263
  gr.Markdown("# Log Probability Visualizer")
264
  gr.Markdown(
265
- "Paste your JSON or Python dictionary log prob data below to visualize the tokens and their probabilities. Use the filter and pagination to navigate large inputs."
266
  )
267
 
268
  with gr.Row():
@@ -273,8 +250,6 @@ with gr.Blocks(title="Log Probability Visualizer") as app:
273
  placeholder="Paste your JSON (e.g., {\"content\": [...]}) or Python dict (e.g., {'content': [...]}) here...",
274
  )
275
  with gr.Column(scale=1):
276
- prob_filter = gr.Slider(minimum=-1e9, maximum=0, value=-1e9, label="Log Probability Filter (≥)")
277
- page_size = gr.Number(value=50, label="Page Size", precision=0, minimum=10, maximum=1000)
278
  page = gr.Number(value=0, label="Page Number", precision=0, minimum=0)
279
 
280
  with gr.Row():
@@ -282,18 +257,17 @@ with gr.Blocks(title="Log Probability Visualizer") as app:
282
  drops_output = gr.Plot(label="Probability Drops (Click for Details)")
283
 
284
  with gr.Row():
285
- anomaly_output = gr.Plot(label="Anomaly Detection (Click for Details)")
286
  table_output = gr.Dataframe(label="Token Log Probabilities and Top Alternatives")
 
287
 
288
  with gr.Row():
289
  text_output = gr.HTML(label="Colored Text (Confidence Visualization)")
290
- alt_viz_output = gr.HTML(label="Top 3 Token Log Probabilities")
291
 
292
  btn = gr.Button("Visualize")
293
  btn.click(
294
  fn=visualize_logprobs,
295
- inputs=[json_input, prob_filter, page_size, page],
296
- outputs=[plot_output, table_output, text_output, alt_viz_output, drops_output, anomaly_output, gr.State(), gr.State()],
297
  )
298
 
299
  # Pagination controls
@@ -303,24 +277,24 @@ with gr.Blocks(title="Log Probability Visualizer") as app:
303
  total_pages_output = gr.Number(label="Total Pages", interactive=False)
304
  current_page_output = gr.Number(label="Current Page", interactive=False)
305
 
306
- def update_page(json_input, prob_filter, page_size, current_page, action):
307
  if action == "prev" and current_page > 0:
308
  current_page -= 1
309
  elif action == "next":
310
- total_pages = visualize_logprobs(json_input, prob_filter, page_size, 0)[6] # Get total pages
311
  if current_page < total_pages - 1:
312
  current_page += 1
313
  return gr.update(value=current_page), gr.update(value=total_pages)
314
 
315
  prev_btn.click(
316
  fn=update_page,
317
- inputs=[json_input, prob_filter, page_size, page, gr.State()],
318
  outputs=[page, total_pages_output]
319
  )
320
 
321
  next_btn.click(
322
  fn=update_page,
323
- inputs=[json_input, prob_filter, page_size, page, gr.State()],
324
  outputs=[page, total_pages_output]
325
  )
326
 
 
10
  import numpy as np
11
  import plotly.graph_objects as go
12
  from plotly.subplots import make_subplots
 
13
 
14
  # Set up logging
15
  logging.basicConfig(level=logging.DEBUG)
 
59
  return None
60
 
61
  # Function to process and visualize log probs with interactive Plotly plots
62
+ def visualize_logprobs(json_input, page=0):
63
  try:
64
  # Parse the input (handles both JSON and Python dictionaries)
65
  data = parse_input(json_input)
 
72
  else:
73
  raise ValueError("Input must be a list or dictionary with 'content' key")
74
 
75
+ # Extract tokens, log probs, and top alternatives, skipping None or non-finite values with fixed filter of -100000
76
  tokens = []
77
  logprobs = []
78
  top_alternatives = [] # List to store top 3 log probs (selected token + 2 alternatives)
79
  for entry in content:
80
  logprob = ensure_float(entry.get("logprob", None))
81
+ if logprob is not None and math.isfinite(logprob) and logprob >= -100000:
82
  tokens.append(entry["token"])
83
  logprobs.append(logprob)
84
  # Get top_logprobs, default to empty dict if None
 
102
  if not logprobs or not tokens:
103
  return (gr.update(value="No finite log probabilities or tokens to visualize after filtering"), None, None, None, 1, 0)
104
 
105
+ # Paginate data for large inputs (fixed page size of 1000)
106
+ page_size = 1000
107
  total_pages = max(1, (len(logprobs) + page_size - 1) // page_size)
108
  start_idx = page * page_size
109
  end_idx = min((page + 1) * page_size, len(logprobs))
 
146
  hovertemplate='<b>%{customdata}</b><extra></extra>'
147
  )
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  # Create DataFrame for the table (paginated)
150
  table_data = []
151
  for i, entry in enumerate(content[start_idx:end_idx]):
152
  logprob = ensure_float(entry.get("logprob", None))
153
+ if logprob is not None and math.isfinite(logprob) and logprob >= -100000 and "top_logprobs" in entry and entry["top_logprobs"] is not None:
154
  token = entry["token"]
155
  top_logprobs = entry["top_logprobs"]
156
  # Ensure all values in top_logprobs are floats
 
208
  colored_text_html = "No finite log probabilities to display."
209
 
210
  # Top 3 Token Log Probabilities (paginated)
211
+ alt_viz_fig = go.Figure()
212
  if paginated_logprobs and paginated_alternatives:
 
213
  for i, (token, probs) in enumerate(zip(paginated_tokens, paginated_alternatives)):
214
  for j, (alt_tok, prob) in enumerate(probs):
215
  alt_viz_fig.add_trace(go.Bar(x=[f"{token} (Pos {i+start_idx})"], y=[prob], name=f"{alt_tok}", marker_color=['blue', 'green', 'red'][j]))
 
229
  else:
230
  alt_viz_html = "No finite log probabilities to display."
231
 
232
+ return (main_fig, df, colored_text_html, alt_viz_html, drops_fig, total_pages, page)
233
 
234
  except Exception as e:
235
  logger.error("Visualization failed: %s", str(e))
236
+ return (gr.update(value=f"Error: {str(e)}"), None, "No finite log probabilities to display.", None, gr.update(value="No data for probability drops."), 1, 0)
237
 
238
  # Gradio interface with interactive layout and pagination
239
  with gr.Blocks(title="Log Probability Visualizer") as app:
240
  gr.Markdown("# Log Probability Visualizer")
241
  gr.Markdown(
242
+ "Paste your JSON or Python dictionary log prob data below to visualize the tokens and their probabilities. Use pagination to navigate large inputs (fixed filter ≥ -100000, 1000 tokens per page)."
243
  )
244
 
245
  with gr.Row():
 
250
  placeholder="Paste your JSON (e.g., {\"content\": [...]}) or Python dict (e.g., {'content': [...]}) here...",
251
  )
252
  with gr.Column(scale=1):
 
 
253
  page = gr.Number(value=0, label="Page Number", precision=0, minimum=0)
254
 
255
  with gr.Row():
 
257
  drops_output = gr.Plot(label="Probability Drops (Click for Details)")
258
 
259
  with gr.Row():
 
260
  table_output = gr.Dataframe(label="Token Log Probabilities and Top Alternatives")
261
+ alt_viz_output = gr.Plot(label="Top 3 Token Log Probabilities (Click for Details)")
262
 
263
  with gr.Row():
264
  text_output = gr.HTML(label="Colored Text (Confidence Visualization)")
 
265
 
266
  btn = gr.Button("Visualize")
267
  btn.click(
268
  fn=visualize_logprobs,
269
+ inputs=[json_input, page],
270
+ outputs=[plot_output, table_output, text_output, alt_viz_output, drops_output, gr.State(), gr.State()],
271
  )
272
 
273
  # Pagination controls
 
277
  total_pages_output = gr.Number(label="Total Pages", interactive=False)
278
  current_page_output = gr.Number(label="Current Page", interactive=False)
279
 
280
+ def update_page(json_input, current_page, action):
281
  if action == "prev" and current_page > 0:
282
  current_page -= 1
283
  elif action == "next":
284
+ total_pages = visualize_logprobs(json_input, 0)[5] # Get total pages
285
  if current_page < total_pages - 1:
286
  current_page += 1
287
  return gr.update(value=current_page), gr.update(value=total_pages)
288
 
289
  prev_btn.click(
290
  fn=update_page,
291
+ inputs=[json_input, page, gr.State()],
292
  outputs=[page, total_pages_output]
293
  )
294
 
295
  next_btn.click(
296
  fn=update_page,
297
+ inputs=[json_input, page, gr.State()],
298
  outputs=[page, total_pages_output]
299
  )
300