Spaces:

codelion
/

LogProbsVisualizer

Running

App Files Files Community

codelion commited on Feb 26

Commit

0d41503

verified ·

1 Parent(s): 7d5d680

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -18

app.py CHANGED Viewed

@@ -23,9 +23,9 @@ def parse_input(json_input):
         logger.debug("Successfully parsed as JSON")
         return data
     except json.JSONDecodeError as e:
-        logger.error("JSON parsing failed: %s", str(e))
         try:
-            # If JSON fails, try to parse as Python literal (e.g., with single quotes)
             data = ast.literal_eval(json_input)
             logger.debug("Successfully parsed as Python literal")
             # Convert Python dictionary to JSON-compatible format (replace single quotes with double quotes)
@@ -40,8 +40,8 @@ def parse_input(json_input):
             logger.debug("Converted to JSON-compatible format")
             return converted_data
         except (SyntaxError, ValueError) as e:
-            logger.error("Python literal parsing failed: %s", str(e))
-            raise ValueError(f"Malformed input: {str(e)}. Ensure property names are in double quotes (e.g., \"content\") or correct Python dictionary format.")
 # Function to ensure a value is a float, converting from string if necessary
 def ensure_float(value):
@@ -69,27 +69,32 @@ def get_token(entry):
 def create_empty_figure(title):
     return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
-# Function to process and visualize the full log probs with dynamic top_logprobs, handling missing tokens
 def visualize_logprobs(json_input):
     try:
-        # Parse the input (handles both JSON and Python dictionaries)
         data = parse_input(json_input)
-        # Ensure data is a list or dictionary with 'content'
         if isinstance(data, dict) and "content" in data:
             content = data["content"]
         elif isinstance(data, list):
-            content = data
         else:
-            raise ValueError("Input must be a list or dictionary with 'content' key")
         # Extract tokens, log probs, and top alternatives, skipping non-finite values with fixed filter of -100000
         tokens = []
         logprobs = []
         top_alternatives = []  # List to store all top_logprobs (dynamic length)
         for entry in content:
             logprob = ensure_float(entry.get("logprob", None))
-            if math.isfinite(logprob) and logprob >= -100000:
                 token = get_token(entry)  # Safely get token, defaulting to "Unknown" if missing
                 tokens.append(token)
                 logprobs.append(logprob)
@@ -110,9 +115,9 @@ def visualize_logprobs(json_input):
             else:
                 logger.debug("Skipping entry with logprob: %s (type: %s)", entry.get("logprob"), type(entry.get("logprob", None)))
-        # Check if there's valid data after filtering
         if not logprobs or not tokens:
-            return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No finite log probabilities to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
         # 1. Main Log Probability Plot (Interactive Plotly)
         main_fig = go.Figure()
@@ -152,8 +157,10 @@ def visualize_logprobs(json_input):
         table_data = []
         max_alternatives = max(len(alts) for alts in top_alternatives) if top_alternatives else 0
         for i, entry in enumerate(content):
             logprob = ensure_float(entry.get("logprob", None))
-            if math.isfinite(logprob) and logprob >= -100000 and "top_logprobs" in entry:
                 token = get_token(entry)  # Safely get token, defaulting to "Unknown" if missing
                 top_logprobs = entry.get("top_logprobs", {})
                 if top_logprobs is None:
@@ -206,7 +213,7 @@ def visualize_logprobs(json_input):
                     colored_text += " "
             colored_text_html = f"<p>{colored_text}</p>"
         else:
-            colored_text_html = "No finite log probabilities to display."
         # Top Token Log Probabilities (Interactive Plotly, dynamic length)
         alt_viz_fig = create_empty_figure("Top Token Log Probabilities") if not logprobs or not top_alternatives else go.Figure()
@@ -230,21 +237,21 @@ def visualize_logprobs(json_input):
         return (main_fig, df, colored_text_html, alt_viz_fig, drops_fig)
     except Exception as e:
-        logger.error("Visualization failed: %s", str(e))
         return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No finite log probabilities to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
-# Gradio interface with full dataset visualization, dynamic top_logprobs, and handling missing tokens
 with gr.Blocks(title="Log Probability Visualizer") as app:
     gr.Markdown("# Log Probability Visualizer")
     gr.Markdown(
-        "Paste your JSON or Python dictionary log prob data below to visualize all tokens at once. Fixed filter ≥ -100000, dynamic number of top_logprobs, handles missing 'token'."
     )
     with gr.Row():
         json_input = gr.Textbox(
             label="JSON Input",
             lines=10,
-            placeholder="Paste your JSON (e.g., {\"content\": [...]}) or Python dict (e.g., {'content': [...]}) here...",
         )
     with gr.Row():

         logger.debug("Successfully parsed as JSON")
         return data
     except json.JSONDecodeError as e:
+        logger.error("JSON parsing failed: %s (Input: %s)", str(e), json_input[:100] + "..." if len(json_input) > 100 else json_input)
         try:
+            # If JSON fails, try to parse as Python literal (e.g., with single quotes), but only for JSON-like strings
             data = ast.literal_eval(json_input)
             logger.debug("Successfully parsed as Python literal")
             # Convert Python dictionary to JSON-compatible format (replace single quotes with double quotes)
             logger.debug("Converted to JSON-compatible format")
             return converted_data
         except (SyntaxError, ValueError) as e:
+            logger.error("Python literal parsing failed: %s (Input: %s)", str(e), json_input[:100] + "..." if len(json_input) > 100 else json_input)
+            raise ValueError(f"Malformed input: {str(e)}. Ensure property names are in double quotes (e.g., \"content\") and the format matches JSON (e.g., {{\"content\": [...]}}).")
 # Function to ensure a value is a float, converting from string if necessary
 def ensure_float(value):
 def create_empty_figure(title):
     return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
+# Function to process and visualize the full log probs with dynamic top_logprobs, handling missing tokens and JSON structure
 def visualize_logprobs(json_input):
     try:
+        # Parse the input (handles JSON only, as specified)
         data = parse_input(json_input)
+        # Ensure data is a dictionary with 'content' key containing a list
         if isinstance(data, dict) and "content" in data:
             content = data["content"]
+            if not isinstance(content, list):
+                raise ValueError("Content must be a list of entries")
         elif isinstance(data, list):
+            content = data  # Handle direct list input (though only JSON is expected)
         else:
+            raise ValueError("Input must be a dictionary with 'content' key or a list of entries")
         # Extract tokens, log probs, and top alternatives, skipping non-finite values with fixed filter of -100000
         tokens = []
         logprobs = []
         top_alternatives = []  # List to store all top_logprobs (dynamic length)
         for entry in content:
+            if not isinstance(entry, dict):
+                logger.warning("Skipping non-dictionary entry: %s", entry)
+                continue
             logprob = ensure_float(entry.get("logprob", None))
+            if logprob >= -100000:  # Include all entries with default 0.0, removing math.isfinite check
                 token = get_token(entry)  # Safely get token, defaulting to "Unknown" if missing
                 tokens.append(token)
                 logprobs.append(logprob)
             else:
                 logger.debug("Skipping entry with logprob: %s (type: %s)", entry.get("logprob"), type(entry.get("logprob", None)))
+        # Check if there's valid data after filtering (including default 0.0)
         if not logprobs or not tokens:
+            return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No tokens to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
         # 1. Main Log Probability Plot (Interactive Plotly)
         main_fig = go.Figure()
         table_data = []
         max_alternatives = max(len(alts) for alts in top_alternatives) if top_alternatives else 0
         for i, entry in enumerate(content):
+            if not isinstance(entry, dict):
+                continue
             logprob = ensure_float(entry.get("logprob", None))
+            if logprob >= -100000 and "top_logprobs" in entry:  # Include all entries with default 0.0
                 token = get_token(entry)  # Safely get token, defaulting to "Unknown" if missing
                 top_logprobs = entry.get("top_logprobs", {})
                 if top_logprobs is None:
                     colored_text += " "
             colored_text_html = f"<p>{colored_text}</p>"
         else:
+            colored_text_html = "No tokens to display."
         # Top Token Log Probabilities (Interactive Plotly, dynamic length)
         alt_viz_fig = create_empty_figure("Top Token Log Probabilities") if not logprobs or not top_alternatives else go.Figure()
         return (main_fig, df, colored_text_html, alt_viz_fig, drops_fig)
     except Exception as e:
+        logger.error("Visualization failed: %s (Input: %s)", str(e), json_input[:100] + "..." if len(json_input) > 100 else json_input)
         return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No finite log probabilities to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
+# Gradio interface with full dataset visualization, dynamic top_logprobs, and robust JSON handling
 with gr.Blocks(title="Log Probability Visualizer") as app:
     gr.Markdown("# Log Probability Visualizer")
     gr.Markdown(
+        "Paste your JSON log prob data below to visualize all tokens at once. Fixed filter ≥ -100000, dynamic number of top_logprobs, handles missing or null fields."
     )
     with gr.Row():
         json_input = gr.Textbox(
             label="JSON Input",
             lines=10,
+            placeholder="Paste your JSON (e.g., {\"content\": [{\"bytes\": [44], \"logprob\": 0.0, \"token\": \",\", \"top_logprobs\": {\" so\": -13.8046875, \".\": -13.8046875, \"，\": -13.640625}}]}).",
         )
     with gr.Row():