Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -23,9 +23,9 @@ def parse_input(json_input):
|
|
23 |
logger.debug("Successfully parsed as JSON")
|
24 |
return data
|
25 |
except json.JSONDecodeError as e:
|
26 |
-
logger.error("JSON parsing failed: %s", str(e))
|
27 |
try:
|
28 |
-
# If JSON fails, try to parse as Python literal (e.g., with single quotes)
|
29 |
data = ast.literal_eval(json_input)
|
30 |
logger.debug("Successfully parsed as Python literal")
|
31 |
# Convert Python dictionary to JSON-compatible format (replace single quotes with double quotes)
|
@@ -40,8 +40,8 @@ def parse_input(json_input):
|
|
40 |
logger.debug("Converted to JSON-compatible format")
|
41 |
return converted_data
|
42 |
except (SyntaxError, ValueError) as e:
|
43 |
-
logger.error("Python literal parsing failed: %s", str(e))
|
44 |
-
raise ValueError(f"Malformed input: {str(e)}. Ensure property names are in double quotes (e.g., \"content\")
|
45 |
|
46 |
# Function to ensure a value is a float, converting from string if necessary
|
47 |
def ensure_float(value):
|
@@ -69,27 +69,32 @@ def get_token(entry):
|
|
69 |
def create_empty_figure(title):
|
70 |
return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
|
71 |
|
72 |
-
# Function to process and visualize the full log probs with dynamic top_logprobs, handling missing tokens
|
73 |
def visualize_logprobs(json_input):
|
74 |
try:
|
75 |
-
# Parse the input (handles
|
76 |
data = parse_input(json_input)
|
77 |
|
78 |
-
# Ensure data is a
|
79 |
if isinstance(data, dict) and "content" in data:
|
80 |
content = data["content"]
|
|
|
|
|
81 |
elif isinstance(data, list):
|
82 |
-
content = data
|
83 |
else:
|
84 |
-
raise ValueError("Input must be a
|
85 |
|
86 |
# Extract tokens, log probs, and top alternatives, skipping non-finite values with fixed filter of -100000
|
87 |
tokens = []
|
88 |
logprobs = []
|
89 |
top_alternatives = [] # List to store all top_logprobs (dynamic length)
|
90 |
for entry in content:
|
|
|
|
|
|
|
91 |
logprob = ensure_float(entry.get("logprob", None))
|
92 |
-
if
|
93 |
token = get_token(entry) # Safely get token, defaulting to "Unknown" if missing
|
94 |
tokens.append(token)
|
95 |
logprobs.append(logprob)
|
@@ -110,9 +115,9 @@ def visualize_logprobs(json_input):
|
|
110 |
else:
|
111 |
logger.debug("Skipping entry with logprob: %s (type: %s)", entry.get("logprob"), type(entry.get("logprob", None)))
|
112 |
|
113 |
-
# Check if there's valid data after filtering
|
114 |
if not logprobs or not tokens:
|
115 |
-
return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No
|
116 |
|
117 |
# 1. Main Log Probability Plot (Interactive Plotly)
|
118 |
main_fig = go.Figure()
|
@@ -152,8 +157,10 @@ def visualize_logprobs(json_input):
|
|
152 |
table_data = []
|
153 |
max_alternatives = max(len(alts) for alts in top_alternatives) if top_alternatives else 0
|
154 |
for i, entry in enumerate(content):
|
|
|
|
|
155 |
logprob = ensure_float(entry.get("logprob", None))
|
156 |
-
if
|
157 |
token = get_token(entry) # Safely get token, defaulting to "Unknown" if missing
|
158 |
top_logprobs = entry.get("top_logprobs", {})
|
159 |
if top_logprobs is None:
|
@@ -206,7 +213,7 @@ def visualize_logprobs(json_input):
|
|
206 |
colored_text += " "
|
207 |
colored_text_html = f"<p>{colored_text}</p>"
|
208 |
else:
|
209 |
-
colored_text_html = "No
|
210 |
|
211 |
# Top Token Log Probabilities (Interactive Plotly, dynamic length)
|
212 |
alt_viz_fig = create_empty_figure("Top Token Log Probabilities") if not logprobs or not top_alternatives else go.Figure()
|
@@ -230,21 +237,21 @@ def visualize_logprobs(json_input):
|
|
230 |
return (main_fig, df, colored_text_html, alt_viz_fig, drops_fig)
|
231 |
|
232 |
except Exception as e:
|
233 |
-
logger.error("Visualization failed: %s", str(e))
|
234 |
return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No finite log probabilities to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
|
235 |
|
236 |
-
# Gradio interface with full dataset visualization, dynamic top_logprobs, and
|
237 |
with gr.Blocks(title="Log Probability Visualizer") as app:
|
238 |
gr.Markdown("# Log Probability Visualizer")
|
239 |
gr.Markdown(
|
240 |
-
"Paste your JSON
|
241 |
)
|
242 |
|
243 |
with gr.Row():
|
244 |
json_input = gr.Textbox(
|
245 |
label="JSON Input",
|
246 |
lines=10,
|
247 |
-
placeholder="Paste your JSON (e.g., {\"content\": [
|
248 |
)
|
249 |
|
250 |
with gr.Row():
|
|
|
23 |
logger.debug("Successfully parsed as JSON")
|
24 |
return data
|
25 |
except json.JSONDecodeError as e:
|
26 |
+
logger.error("JSON parsing failed: %s (Input: %s)", str(e), json_input[:100] + "..." if len(json_input) > 100 else json_input)
|
27 |
try:
|
28 |
+
# If JSON fails, try to parse as Python literal (e.g., with single quotes), but only for JSON-like strings
|
29 |
data = ast.literal_eval(json_input)
|
30 |
logger.debug("Successfully parsed as Python literal")
|
31 |
# Convert Python dictionary to JSON-compatible format (replace single quotes with double quotes)
|
|
|
40 |
logger.debug("Converted to JSON-compatible format")
|
41 |
return converted_data
|
42 |
except (SyntaxError, ValueError) as e:
|
43 |
+
logger.error("Python literal parsing failed: %s (Input: %s)", str(e), json_input[:100] + "..." if len(json_input) > 100 else json_input)
|
44 |
+
raise ValueError(f"Malformed input: {str(e)}. Ensure property names are in double quotes (e.g., \"content\") and the format matches JSON (e.g., {{\"content\": [...]}}).")
|
45 |
|
46 |
# Function to ensure a value is a float, converting from string if necessary
|
47 |
def ensure_float(value):
|
|
|
69 |
def create_empty_figure(title):
|
70 |
return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
|
71 |
|
72 |
+
# Function to process and visualize the full log probs with dynamic top_logprobs, handling missing tokens and JSON structure
|
73 |
def visualize_logprobs(json_input):
|
74 |
try:
|
75 |
+
# Parse the input (handles JSON only, as specified)
|
76 |
data = parse_input(json_input)
|
77 |
|
78 |
+
# Ensure data is a dictionary with 'content' key containing a list
|
79 |
if isinstance(data, dict) and "content" in data:
|
80 |
content = data["content"]
|
81 |
+
if not isinstance(content, list):
|
82 |
+
raise ValueError("Content must be a list of entries")
|
83 |
elif isinstance(data, list):
|
84 |
+
content = data # Handle direct list input (though only JSON is expected)
|
85 |
else:
|
86 |
+
raise ValueError("Input must be a dictionary with 'content' key or a list of entries")
|
87 |
|
88 |
# Extract tokens, log probs, and top alternatives, skipping non-finite values with fixed filter of -100000
|
89 |
tokens = []
|
90 |
logprobs = []
|
91 |
top_alternatives = [] # List to store all top_logprobs (dynamic length)
|
92 |
for entry in content:
|
93 |
+
if not isinstance(entry, dict):
|
94 |
+
logger.warning("Skipping non-dictionary entry: %s", entry)
|
95 |
+
continue
|
96 |
logprob = ensure_float(entry.get("logprob", None))
|
97 |
+
if logprob >= -100000: # Include all entries with default 0.0, removing math.isfinite check
|
98 |
token = get_token(entry) # Safely get token, defaulting to "Unknown" if missing
|
99 |
tokens.append(token)
|
100 |
logprobs.append(logprob)
|
|
|
115 |
else:
|
116 |
logger.debug("Skipping entry with logprob: %s (type: %s)", entry.get("logprob"), type(entry.get("logprob", None)))
|
117 |
|
118 |
+
# Check if there's valid data after filtering (including default 0.0)
|
119 |
if not logprobs or not tokens:
|
120 |
+
return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No tokens to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
|
121 |
|
122 |
# 1. Main Log Probability Plot (Interactive Plotly)
|
123 |
main_fig = go.Figure()
|
|
|
157 |
table_data = []
|
158 |
max_alternatives = max(len(alts) for alts in top_alternatives) if top_alternatives else 0
|
159 |
for i, entry in enumerate(content):
|
160 |
+
if not isinstance(entry, dict):
|
161 |
+
continue
|
162 |
logprob = ensure_float(entry.get("logprob", None))
|
163 |
+
if logprob >= -100000 and "top_logprobs" in entry: # Include all entries with default 0.0
|
164 |
token = get_token(entry) # Safely get token, defaulting to "Unknown" if missing
|
165 |
top_logprobs = entry.get("top_logprobs", {})
|
166 |
if top_logprobs is None:
|
|
|
213 |
colored_text += " "
|
214 |
colored_text_html = f"<p>{colored_text}</p>"
|
215 |
else:
|
216 |
+
colored_text_html = "No tokens to display."
|
217 |
|
218 |
# Top Token Log Probabilities (Interactive Plotly, dynamic length)
|
219 |
alt_viz_fig = create_empty_figure("Top Token Log Probabilities") if not logprobs or not top_alternatives else go.Figure()
|
|
|
237 |
return (main_fig, df, colored_text_html, alt_viz_fig, drops_fig)
|
238 |
|
239 |
except Exception as e:
|
240 |
+
logger.error("Visualization failed: %s (Input: %s)", str(e), json_input[:100] + "..." if len(json_input) > 100 else json_input)
|
241 |
return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No finite log probabilities to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
|
242 |
|
243 |
+
# Gradio interface with full dataset visualization, dynamic top_logprobs, and robust JSON handling
|
244 |
with gr.Blocks(title="Log Probability Visualizer") as app:
|
245 |
gr.Markdown("# Log Probability Visualizer")
|
246 |
gr.Markdown(
|
247 |
+
"Paste your JSON log prob data below to visualize all tokens at once. Fixed filter ≥ -100000, dynamic number of top_logprobs, handles missing or null fields."
|
248 |
)
|
249 |
|
250 |
with gr.Row():
|
251 |
json_input = gr.Textbox(
|
252 |
label="JSON Input",
|
253 |
lines=10,
|
254 |
+
placeholder="Paste your JSON (e.g., {\"content\": [{\"bytes\": [44], \"logprob\": 0.0, \"token\": \",\", \"top_logprobs\": {\" so\": -13.8046875, \".\": -13.8046875, \",\": -13.640625}}]}).",
|
255 |
)
|
256 |
|
257 |
with gr.Row():
|