codelion commited on
Commit
0d41503
·
verified ·
1 Parent(s): 7d5d680

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -18
app.py CHANGED
@@ -23,9 +23,9 @@ def parse_input(json_input):
23
  logger.debug("Successfully parsed as JSON")
24
  return data
25
  except json.JSONDecodeError as e:
26
- logger.error("JSON parsing failed: %s", str(e))
27
  try:
28
- # If JSON fails, try to parse as Python literal (e.g., with single quotes)
29
  data = ast.literal_eval(json_input)
30
  logger.debug("Successfully parsed as Python literal")
31
  # Convert Python dictionary to JSON-compatible format (replace single quotes with double quotes)
@@ -40,8 +40,8 @@ def parse_input(json_input):
40
  logger.debug("Converted to JSON-compatible format")
41
  return converted_data
42
  except (SyntaxError, ValueError) as e:
43
- logger.error("Python literal parsing failed: %s", str(e))
44
- raise ValueError(f"Malformed input: {str(e)}. Ensure property names are in double quotes (e.g., \"content\") or correct Python dictionary format.")
45
 
46
  # Function to ensure a value is a float, converting from string if necessary
47
  def ensure_float(value):
@@ -69,27 +69,32 @@ def get_token(entry):
69
  def create_empty_figure(title):
70
  return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
71
 
72
- # Function to process and visualize the full log probs with dynamic top_logprobs, handling missing tokens
73
  def visualize_logprobs(json_input):
74
  try:
75
- # Parse the input (handles both JSON and Python dictionaries)
76
  data = parse_input(json_input)
77
 
78
- # Ensure data is a list or dictionary with 'content'
79
  if isinstance(data, dict) and "content" in data:
80
  content = data["content"]
 
 
81
  elif isinstance(data, list):
82
- content = data
83
  else:
84
- raise ValueError("Input must be a list or dictionary with 'content' key")
85
 
86
  # Extract tokens, log probs, and top alternatives, skipping non-finite values with fixed filter of -100000
87
  tokens = []
88
  logprobs = []
89
  top_alternatives = [] # List to store all top_logprobs (dynamic length)
90
  for entry in content:
 
 
 
91
  logprob = ensure_float(entry.get("logprob", None))
92
- if math.isfinite(logprob) and logprob >= -100000:
93
  token = get_token(entry) # Safely get token, defaulting to "Unknown" if missing
94
  tokens.append(token)
95
  logprobs.append(logprob)
@@ -110,9 +115,9 @@ def visualize_logprobs(json_input):
110
  else:
111
  logger.debug("Skipping entry with logprob: %s (type: %s)", entry.get("logprob"), type(entry.get("logprob", None)))
112
 
113
- # Check if there's valid data after filtering
114
  if not logprobs or not tokens:
115
- return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No finite log probabilities to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
116
 
117
  # 1. Main Log Probability Plot (Interactive Plotly)
118
  main_fig = go.Figure()
@@ -152,8 +157,10 @@ def visualize_logprobs(json_input):
152
  table_data = []
153
  max_alternatives = max(len(alts) for alts in top_alternatives) if top_alternatives else 0
154
  for i, entry in enumerate(content):
 
 
155
  logprob = ensure_float(entry.get("logprob", None))
156
- if math.isfinite(logprob) and logprob >= -100000 and "top_logprobs" in entry:
157
  token = get_token(entry) # Safely get token, defaulting to "Unknown" if missing
158
  top_logprobs = entry.get("top_logprobs", {})
159
  if top_logprobs is None:
@@ -206,7 +213,7 @@ def visualize_logprobs(json_input):
206
  colored_text += " "
207
  colored_text_html = f"<p>{colored_text}</p>"
208
  else:
209
- colored_text_html = "No finite log probabilities to display."
210
 
211
  # Top Token Log Probabilities (Interactive Plotly, dynamic length)
212
  alt_viz_fig = create_empty_figure("Top Token Log Probabilities") if not logprobs or not top_alternatives else go.Figure()
@@ -230,21 +237,21 @@ def visualize_logprobs(json_input):
230
  return (main_fig, df, colored_text_html, alt_viz_fig, drops_fig)
231
 
232
  except Exception as e:
233
- logger.error("Visualization failed: %s", str(e))
234
  return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No finite log probabilities to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
235
 
236
- # Gradio interface with full dataset visualization, dynamic top_logprobs, and handling missing tokens
237
  with gr.Blocks(title="Log Probability Visualizer") as app:
238
  gr.Markdown("# Log Probability Visualizer")
239
  gr.Markdown(
240
- "Paste your JSON or Python dictionary log prob data below to visualize all tokens at once. Fixed filter ≥ -100000, dynamic number of top_logprobs, handles missing 'token'."
241
  )
242
 
243
  with gr.Row():
244
  json_input = gr.Textbox(
245
  label="JSON Input",
246
  lines=10,
247
- placeholder="Paste your JSON (e.g., {\"content\": [...]}) or Python dict (e.g., {'content': [...]}) here...",
248
  )
249
 
250
  with gr.Row():
 
23
  logger.debug("Successfully parsed as JSON")
24
  return data
25
  except json.JSONDecodeError as e:
26
+ logger.error("JSON parsing failed: %s (Input: %s)", str(e), json_input[:100] + "..." if len(json_input) > 100 else json_input)
27
  try:
28
+ # If JSON fails, try to parse as Python literal (e.g., with single quotes), but only for JSON-like strings
29
  data = ast.literal_eval(json_input)
30
  logger.debug("Successfully parsed as Python literal")
31
  # Convert Python dictionary to JSON-compatible format (replace single quotes with double quotes)
 
40
  logger.debug("Converted to JSON-compatible format")
41
  return converted_data
42
  except (SyntaxError, ValueError) as e:
43
+ logger.error("Python literal parsing failed: %s (Input: %s)", str(e), json_input[:100] + "..." if len(json_input) > 100 else json_input)
44
+ raise ValueError(f"Malformed input: {str(e)}. Ensure property names are in double quotes (e.g., \"content\") and the format matches JSON (e.g., {{\"content\": [...]}}).")
45
 
46
  # Function to ensure a value is a float, converting from string if necessary
47
  def ensure_float(value):
 
69
  def create_empty_figure(title):
70
  return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
71
 
72
+ # Function to process and visualize the full log probs with dynamic top_logprobs, handling missing tokens and JSON structure
73
  def visualize_logprobs(json_input):
74
  try:
75
+ # Parse the input (handles JSON only, as specified)
76
  data = parse_input(json_input)
77
 
78
+ # Ensure data is a dictionary with 'content' key containing a list
79
  if isinstance(data, dict) and "content" in data:
80
  content = data["content"]
81
+ if not isinstance(content, list):
82
+ raise ValueError("Content must be a list of entries")
83
  elif isinstance(data, list):
84
+ content = data # Handle direct list input (though only JSON is expected)
85
  else:
86
+ raise ValueError("Input must be a dictionary with 'content' key or a list of entries")
87
 
88
  # Extract tokens, log probs, and top alternatives, skipping non-finite values with fixed filter of -100000
89
  tokens = []
90
  logprobs = []
91
  top_alternatives = [] # List to store all top_logprobs (dynamic length)
92
  for entry in content:
93
+ if not isinstance(entry, dict):
94
+ logger.warning("Skipping non-dictionary entry: %s", entry)
95
+ continue
96
  logprob = ensure_float(entry.get("logprob", None))
97
+ if logprob >= -100000: # Include all entries with default 0.0, removing math.isfinite check
98
  token = get_token(entry) # Safely get token, defaulting to "Unknown" if missing
99
  tokens.append(token)
100
  logprobs.append(logprob)
 
115
  else:
116
  logger.debug("Skipping entry with logprob: %s (type: %s)", entry.get("logprob"), type(entry.get("logprob", None)))
117
 
118
+ # Check if there's valid data after filtering (including default 0.0)
119
  if not logprobs or not tokens:
120
+ return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No tokens to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
121
 
122
  # 1. Main Log Probability Plot (Interactive Plotly)
123
  main_fig = go.Figure()
 
157
  table_data = []
158
  max_alternatives = max(len(alts) for alts in top_alternatives) if top_alternatives else 0
159
  for i, entry in enumerate(content):
160
+ if not isinstance(entry, dict):
161
+ continue
162
  logprob = ensure_float(entry.get("logprob", None))
163
+ if logprob >= -100000 and "top_logprobs" in entry: # Include all entries with default 0.0
164
  token = get_token(entry) # Safely get token, defaulting to "Unknown" if missing
165
  top_logprobs = entry.get("top_logprobs", {})
166
  if top_logprobs is None:
 
213
  colored_text += " "
214
  colored_text_html = f"<p>{colored_text}</p>"
215
  else:
216
+ colored_text_html = "No tokens to display."
217
 
218
  # Top Token Log Probabilities (Interactive Plotly, dynamic length)
219
  alt_viz_fig = create_empty_figure("Top Token Log Probabilities") if not logprobs or not top_alternatives else go.Figure()
 
237
  return (main_fig, df, colored_text_html, alt_viz_fig, drops_fig)
238
 
239
  except Exception as e:
240
+ logger.error("Visualization failed: %s (Input: %s)", str(e), json_input[:100] + "..." if len(json_input) > 100 else json_input)
241
  return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No finite log probabilities to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
242
 
243
+ # Gradio interface with full dataset visualization, dynamic top_logprobs, and robust JSON handling
244
  with gr.Blocks(title="Log Probability Visualizer") as app:
245
  gr.Markdown("# Log Probability Visualizer")
246
  gr.Markdown(
247
+ "Paste your JSON log prob data below to visualize all tokens at once. Fixed filter ≥ -100000, dynamic number of top_logprobs, handles missing or null fields."
248
  )
249
 
250
  with gr.Row():
251
  json_input = gr.Textbox(
252
  label="JSON Input",
253
  lines=10,
254
+ placeholder="Paste your JSON (e.g., {\"content\": [{\"bytes\": [44], \"logprob\": 0.0, \"token\": \",\", \"top_logprobs\": {\" so\": -13.8046875, \".\": -13.8046875, \",\": -13.640625}}]}).",
255
  )
256
 
257
  with gr.Row():