codelion commited on
Commit
7d5d680
·
verified ·
1 Parent(s): b8e291e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -6
app.py CHANGED
@@ -58,11 +58,18 @@ def ensure_float(value):
58
  return float(value)
59
  return 0.0 # Default for any other type
60
 
 
 
 
 
 
 
 
61
  # Function to create an empty Plotly figure
62
  def create_empty_figure(title):
63
  return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
64
 
65
- # Function to process and visualize the full log probs with dynamic top_logprobs, handling None
66
  def visualize_logprobs(json_input):
67
  try:
68
  # Parse the input (handles both JSON and Python dictionaries)
@@ -83,12 +90,13 @@ def visualize_logprobs(json_input):
83
  for entry in content:
84
  logprob = ensure_float(entry.get("logprob", None))
85
  if math.isfinite(logprob) and logprob >= -100000:
86
- tokens.append(entry["token"])
 
87
  logprobs.append(logprob)
88
  # Get top_logprobs, default to empty dict if None
89
  top_probs = entry.get("top_logprobs", {})
90
  if top_probs is None:
91
- logger.debug("top_logprobs is None for token: %s, using empty dict", entry["token"])
92
  top_probs = {} # Default to empty dict for None
93
  # Ensure all values in top_logprobs are floats and create a list of tuples
94
  finite_top_probs = []
@@ -146,7 +154,7 @@ def visualize_logprobs(json_input):
146
  for i, entry in enumerate(content):
147
  logprob = ensure_float(entry.get("logprob", None))
148
  if math.isfinite(logprob) and logprob >= -100000 and "top_logprobs" in entry:
149
- token = entry["token"]
150
  top_logprobs = entry.get("top_logprobs", {})
151
  if top_logprobs is None:
152
  logger.debug("top_logprobs is None for token: %s, using empty dict", token)
@@ -225,11 +233,11 @@ def visualize_logprobs(json_input):
225
  logger.error("Visualization failed: %s", str(e))
226
  return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No finite log probabilities to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
227
 
228
- # Gradio interface with full dataset visualization and dynamic top_logprobs
229
  with gr.Blocks(title="Log Probability Visualizer") as app:
230
  gr.Markdown("# Log Probability Visualizer")
231
  gr.Markdown(
232
- "Paste your JSON or Python dictionary log prob data below to visualize all tokens at once. Fixed filter ≥ -100000, dynamic number of top_logprobs."
233
  )
234
 
235
  with gr.Row():
 
58
  return float(value)
59
  return 0.0 # Default for any other type
60
 
61
+ # Function to get or generate a token value (default to "Unknown" if missing)
62
+ def get_token(entry):
63
+ token = entry.get("token", "Unknown")
64
+ if token == "Unknown":
65
+ logger.warning("Missing 'token' key for entry: %s, using 'Unknown'", entry)
66
+ return token
67
+
68
  # Function to create an empty Plotly figure
69
  def create_empty_figure(title):
70
  return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
71
 
72
+ # Function to process and visualize the full log probs with dynamic top_logprobs, handling missing tokens
73
  def visualize_logprobs(json_input):
74
  try:
75
  # Parse the input (handles both JSON and Python dictionaries)
 
90
  for entry in content:
91
  logprob = ensure_float(entry.get("logprob", None))
92
  if math.isfinite(logprob) and logprob >= -100000:
93
+ token = get_token(entry) # Safely get token, defaulting to "Unknown" if missing
94
+ tokens.append(token)
95
  logprobs.append(logprob)
96
  # Get top_logprobs, default to empty dict if None
97
  top_probs = entry.get("top_logprobs", {})
98
  if top_probs is None:
99
+ logger.debug("top_logprobs is None for token: %s, using empty dict", token)
100
  top_probs = {} # Default to empty dict for None
101
  # Ensure all values in top_logprobs are floats and create a list of tuples
102
  finite_top_probs = []
 
154
  for i, entry in enumerate(content):
155
  logprob = ensure_float(entry.get("logprob", None))
156
  if math.isfinite(logprob) and logprob >= -100000 and "top_logprobs" in entry:
157
+ token = get_token(entry) # Safely get token, defaulting to "Unknown" if missing
158
  top_logprobs = entry.get("top_logprobs", {})
159
  if top_logprobs is None:
160
  logger.debug("top_logprobs is None for token: %s, using empty dict", token)
 
233
  logger.error("Visualization failed: %s", str(e))
234
  return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No finite log probabilities to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
235
 
236
+ # Gradio interface with full dataset visualization, dynamic top_logprobs, and handling missing tokens
237
  with gr.Blocks(title="Log Probability Visualizer") as app:
238
  gr.Markdown("# Log Probability Visualizer")
239
  gr.Markdown(
240
+ "Paste your JSON or Python dictionary log prob data below to visualize all tokens at once. Fixed filter ≥ -100000, dynamic number of top_logprobs, handles missing 'token'."
241
  )
242
 
243
  with gr.Row():