Spaces:

codelion
/

LogProbsVisualizer

Running

App Files Files Community

codelion commited on Feb 26

Commit

527fd08

verified ·

1 Parent(s): d8a969c

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -12

app.py CHANGED Viewed

@@ -6,17 +6,26 @@ import io
 import base64
 import math
 import ast
 # Function to safely parse JSON or Python dictionary input
 def parse_input(json_input):
     try:
         # Try to parse as JSON first
         data = json.loads(json_input)
         return data
     except json.JSONDecodeError as e:
         try:
             # If JSON fails, try to parse as Python literal (e.g., with single quotes)
             data = ast.literal_eval(json_input)
             # Convert Python dictionary to JSON-compatible format (replace single quotes with double quotes)
             def dict_to_json(obj):
                 if isinstance(obj, dict):
@@ -25,10 +34,27 @@ def parse_input(json_input):
                     return [dict_to_json(item) for item in obj]
                 else:
                     return obj
-            return dict_to_json(data)
         except (SyntaxError, ValueError) as e:
             raise ValueError(f"Malformed input: {str(e)}. Ensure property names are in double quotes (e.g., \"content\") or correct Python dictionary format.")
 # Function to process and visualize log probs
 def visualize_logprobs(json_input):
     try:
@@ -47,32 +73,37 @@ def visualize_logprobs(json_input):
         tokens = []
         logprobs = []
         for entry in content:
-            if (
-                "logprob" in entry
-                and entry["logprob"] is not None
-                and math.isfinite(entry["logprob"])
-            ):
                 tokens.append(entry["token"])
-                logprobs.append(entry["logprob"])
         # Prepare table data, handling None in top_logprobs
         table_data = []
         for entry in content:
             # Only include entries with finite logprob and non-None top_logprobs
             if (
-                "logprob" in entry
-                and entry["logprob"] is not None
-                and math.isfinite(entry["logprob"])
                 and "top_logprobs" in entry
                 and entry["top_logprobs"] is not None
             ):
                 token = entry["token"]
-                logprob = entry["logprob"]
                 top_logprobs = entry["top_logprobs"]
                 # Extract top 3 alternatives from top_logprobs
                 top_3 = sorted(
-                    top_logprobs.items(), key=lambda x: x[1], reverse=True
                 )[:3]
                 row = [token, f"{logprob:.4f}"]
                 for alt_token, alt_logprob in top_3:
@@ -149,6 +180,7 @@ def visualize_logprobs(json_input):
         return img_html, df, colored_text_html
     except Exception as e:
         return f"Error: {str(e)}", None, None
 # Gradio interface

 import base64
 import math
 import ast
+import logging
+# Set up logging
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
 # Function to safely parse JSON or Python dictionary input
 def parse_input(json_input):
+    logger.debug("Attempting to parse input: %s", json_input)
     try:
         # Try to parse as JSON first
         data = json.loads(json_input)
+        logger.debug("Successfully parsed as JSON")
         return data
     except json.JSONDecodeError as e:
+        logger.error("JSON parsing failed: %s", str(e))
         try:
             # If JSON fails, try to parse as Python literal (e.g., with single quotes)
             data = ast.literal_eval(json_input)
+            logger.debug("Successfully parsed as Python literal")
             # Convert Python dictionary to JSON-compatible format (replace single quotes with double quotes)
             def dict_to_json(obj):
                 if isinstance(obj, dict):
                     return [dict_to_json(item) for item in obj]
                 else:
                     return obj
+            converted_data = dict_to_json(data)
+            logger.debug("Converted to JSON-compatible format")
+            return converted_data
         except (SyntaxError, ValueError) as e:
+            logger.error("Python literal parsing failed: %s", str(e))
             raise ValueError(f"Malformed input: {str(e)}. Ensure property names are in double quotes (e.g., \"content\") or correct Python dictionary format.")
+# Function to ensure a value is a float, converting from string if necessary
+def ensure_float(value):
+    if value is None:
+        return None
+    if isinstance(value, str):
+        try:
+            return float(value)
+        except ValueError:
+            logger.error("Failed to convert string '%s' to float", value)
+            return None
+    if isinstance(value, (int, float)):
+        return float(value)
+    return None
 # Function to process and visualize log probs
 def visualize_logprobs(json_input):
     try:
         tokens = []
         logprobs = []
         for entry in content:
+            logprob = ensure_float(entry.get("logprob", None))
+            if logprob is not None and math.isfinite(logprob):
                 tokens.append(entry["token"])
+                logprobs.append(logprob)
+            else:
+                logger.debug("Skipping entry with logprob: %s (type: %s)", entry.get("logprob"), type(entry.get("logprob", None)))
         # Prepare table data, handling None in top_logprobs
         table_data = []
         for entry in content:
+            logprob = ensure_float(entry.get("logprob", None))
             # Only include entries with finite logprob and non-None top_logprobs
             if (
+                logprob is not None
+                and math.isfinite(logprob)
                 and "top_logprobs" in entry
                 and entry["top_logprobs"] is not None
             ):
                 token = entry["token"]
+                logger.debug("Processing token: %s, logprob: %s (type: %s)", token, logprob, type(logprob))
                 top_logprobs = entry["top_logprobs"]
+                # Ensure all values in top_logprobs are floats
+                finite_top_logprobs = {}
+                for key, value in top_logprobs.items():
+                    float_value = ensure_float(value)
+                    if float_value is not None and math.isfinite(float_value):
+                        finite_top_logprobs[key] = float_value
                 # Extract top 3 alternatives from top_logprobs
                 top_3 = sorted(
+                    finite_top_logprobs.items(), key=lambda x: x[1], reverse=True
                 )[:3]
                 row = [token, f"{logprob:.4f}"]
                 for alt_token, alt_logprob in top_3:
         return img_html, df, colored_text_html
     except Exception as e:
+        logger.error("Visualization failed: %s", str(e))
         return f"Error: {str(e)}", None, None
 # Gradio interface