Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -58,11 +58,18 @@ def ensure_float(value):
|
|
58 |
return float(value)
|
59 |
return 0.0 # Default for any other type
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
# Function to create an empty Plotly figure
|
62 |
def create_empty_figure(title):
|
63 |
return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
|
64 |
|
65 |
-
# Function to process and visualize the full log probs with dynamic top_logprobs, handling
|
66 |
def visualize_logprobs(json_input):
|
67 |
try:
|
68 |
# Parse the input (handles both JSON and Python dictionaries)
|
@@ -83,12 +90,13 @@ def visualize_logprobs(json_input):
|
|
83 |
for entry in content:
|
84 |
logprob = ensure_float(entry.get("logprob", None))
|
85 |
if math.isfinite(logprob) and logprob >= -100000:
|
86 |
-
|
|
|
87 |
logprobs.append(logprob)
|
88 |
# Get top_logprobs, default to empty dict if None
|
89 |
top_probs = entry.get("top_logprobs", {})
|
90 |
if top_probs is None:
|
91 |
-
logger.debug("top_logprobs is None for token: %s, using empty dict",
|
92 |
top_probs = {} # Default to empty dict for None
|
93 |
# Ensure all values in top_logprobs are floats and create a list of tuples
|
94 |
finite_top_probs = []
|
@@ -146,7 +154,7 @@ def visualize_logprobs(json_input):
|
|
146 |
for i, entry in enumerate(content):
|
147 |
logprob = ensure_float(entry.get("logprob", None))
|
148 |
if math.isfinite(logprob) and logprob >= -100000 and "top_logprobs" in entry:
|
149 |
-
token = entry
|
150 |
top_logprobs = entry.get("top_logprobs", {})
|
151 |
if top_logprobs is None:
|
152 |
logger.debug("top_logprobs is None for token: %s, using empty dict", token)
|
@@ -225,11 +233,11 @@ def visualize_logprobs(json_input):
|
|
225 |
logger.error("Visualization failed: %s", str(e))
|
226 |
return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No finite log probabilities to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
|
227 |
|
228 |
-
# Gradio interface with full dataset visualization and
|
229 |
with gr.Blocks(title="Log Probability Visualizer") as app:
|
230 |
gr.Markdown("# Log Probability Visualizer")
|
231 |
gr.Markdown(
|
232 |
-
"Paste your JSON or Python dictionary log prob data below to visualize all tokens at once. Fixed filter ≥ -100000, dynamic number of top_logprobs."
|
233 |
)
|
234 |
|
235 |
with gr.Row():
|
|
|
58 |
return float(value)
|
59 |
return 0.0 # Default for any other type
|
60 |
|
61 |
+
# Function to get or generate a token value (default to "Unknown" if missing)
|
62 |
+
def get_token(entry):
|
63 |
+
token = entry.get("token", "Unknown")
|
64 |
+
if token == "Unknown":
|
65 |
+
logger.warning("Missing 'token' key for entry: %s, using 'Unknown'", entry)
|
66 |
+
return token
|
67 |
+
|
68 |
# Function to create an empty Plotly figure
|
69 |
def create_empty_figure(title):
|
70 |
return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
|
71 |
|
72 |
+
# Function to process and visualize the full log probs with dynamic top_logprobs, handling missing tokens
|
73 |
def visualize_logprobs(json_input):
|
74 |
try:
|
75 |
# Parse the input (handles both JSON and Python dictionaries)
|
|
|
90 |
for entry in content:
|
91 |
logprob = ensure_float(entry.get("logprob", None))
|
92 |
if math.isfinite(logprob) and logprob >= -100000:
|
93 |
+
token = get_token(entry) # Safely get token, defaulting to "Unknown" if missing
|
94 |
+
tokens.append(token)
|
95 |
logprobs.append(logprob)
|
96 |
# Get top_logprobs, default to empty dict if None
|
97 |
top_probs = entry.get("top_logprobs", {})
|
98 |
if top_probs is None:
|
99 |
+
logger.debug("top_logprobs is None for token: %s, using empty dict", token)
|
100 |
top_probs = {} # Default to empty dict for None
|
101 |
# Ensure all values in top_logprobs are floats and create a list of tuples
|
102 |
finite_top_probs = []
|
|
|
154 |
for i, entry in enumerate(content):
|
155 |
logprob = ensure_float(entry.get("logprob", None))
|
156 |
if math.isfinite(logprob) and logprob >= -100000 and "top_logprobs" in entry:
|
157 |
+
token = get_token(entry) # Safely get token, defaulting to "Unknown" if missing
|
158 |
top_logprobs = entry.get("top_logprobs", {})
|
159 |
if top_logprobs is None:
|
160 |
logger.debug("top_logprobs is None for token: %s, using empty dict", token)
|
|
|
233 |
logger.error("Visualization failed: %s", str(e))
|
234 |
return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No finite log probabilities to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
|
235 |
|
236 |
+
# Gradio interface with full dataset visualization, dynamic top_logprobs, and handling missing tokens
|
237 |
with gr.Blocks(title="Log Probability Visualizer") as app:
|
238 |
gr.Markdown("# Log Probability Visualizer")
|
239 |
gr.Markdown(
|
240 |
+
"Paste your JSON or Python dictionary log prob data below to visualize all tokens at once. Fixed filter ≥ -100000, dynamic number of top_logprobs, handles missing 'token'."
|
241 |
)
|
242 |
|
243 |
with gr.Row():
|