import gradio as gr
import json
import matplotlib.pyplot as plt
import pandas as pd
import io
import base64
import math
import ast
import logging
import numpy as np
import plotly.graph_objects as go
# Set up logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
# Function to safely parse JSON or Python dictionary input
def parse_input(json_input):
logger.debug("Attempting to parse input: %s", json_input)
try:
# Try to parse as JSON first
data = json.loads(json_input)
logger.debug("Successfully parsed as JSON")
return data
except json.JSONDecodeError as e:
logger.error("JSON parsing failed: %s", str(e))
try:
# If JSON fails, try to parse as Python literal (e.g., with single quotes)
data = ast.literal_eval(json_input)
logger.debug("Successfully parsed as Python literal")
# Convert Python dictionary to JSON-compatible format (replace single quotes with double quotes)
def dict_to_json(obj):
if isinstance(obj, dict):
return {str(k): dict_to_json(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [dict_to_json(item) for item in obj]
else:
return obj
converted_data = dict_to_json(data)
logger.debug("Converted to JSON-compatible format")
return converted_data
except (SyntaxError, ValueError) as e:
logger.error("Python literal parsing failed: %s", str(e))
raise ValueError(f"Malformed input: {str(e)}. Ensure property names are in double quotes (e.g., \"content\") or correct Python dictionary format.")
# Function to ensure a value is a float, converting from string if necessary
def ensure_float(value):
if value is None:
logger.debug("Replacing None logprob with 0.0")
return 0.0 # Default to 0.0 for None to ensure visualization
if isinstance(value, str):
try:
return float(value)
except ValueError:
logger.error("Failed to convert string '%s' to float", value)
return 0.0 # Default to 0.0 for invalid strings
if isinstance(value, (int, float)):
return float(value)
return 0.0 # Default for any other type
# Function to get or generate a token value (default to "Unknown" if missing)
def get_token(entry):
token = entry.get("token", "Unknown")
if token == "Unknown":
logger.warning("Missing 'token' key for entry: %s, using 'Unknown'", entry)
return token
# Function to create an empty Plotly figure
def create_empty_figure(title):
return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
# Function to process and visualize the full log probs with dynamic top_logprobs, handling missing tokens
def visualize_logprobs(json_input):
try:
# Parse the input (handles both JSON and Python dictionaries)
data = parse_input(json_input)
# Ensure data is a list or dictionary with 'content'
if isinstance(data, dict) and "content" in data:
content = data["content"]
elif isinstance(data, list):
content = data
else:
raise ValueError("Input must be a list or dictionary with 'content' key")
# Extract tokens, log probs, and top alternatives, skipping non-finite values with fixed filter of -100000
tokens = []
logprobs = []
top_alternatives = [] # List to store all top_logprobs (dynamic length)
for entry in content:
logprob = ensure_float(entry.get("logprob", None))
if math.isfinite(logprob) and logprob >= -100000:
token = get_token(entry) # Safely get token, defaulting to "Unknown" if missing
tokens.append(token)
logprobs.append(logprob)
# Get top_logprobs, default to empty dict if None
top_probs = entry.get("top_logprobs", {})
if top_probs is None:
logger.debug("top_logprobs is None for token: %s, using empty dict", token)
top_probs = {} # Default to empty dict for None
# Ensure all values in top_logprobs are floats and create a list of tuples
finite_top_probs = []
for key, value in top_probs.items():
float_value = ensure_float(value)
if float_value is not None and math.isfinite(float_value):
finite_top_probs.append((key, float_value))
# Sort by log probability (descending) to get all alternatives
sorted_probs = sorted(finite_top_probs, key=lambda x: x[1], reverse=True)
top_alternatives.append(sorted_probs) # Store all alternatives, dynamic length
else:
logger.debug("Skipping entry with logprob: %s (type: %s)", entry.get("logprob"), type(entry.get("logprob", None)))
# Check if there's valid data after filtering
if not logprobs or not tokens:
return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No finite log probabilities to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
# 1. Main Log Probability Plot (Interactive Plotly)
main_fig = go.Figure()
main_fig.add_trace(go.Scatter(x=list(range(len(logprobs))), y=logprobs, mode='markers+lines', name='Log Prob', marker=dict(color='blue')))
main_fig.update_layout(
title="Log Probabilities of Generated Tokens",
xaxis_title="Token Position",
yaxis_title="Log Probability",
hovermode="closest",
clickmode='event+select'
)
main_fig.update_traces(
customdata=[f"Token: {tok}, Log Prob: {prob:.4f}, Position: {i}" for i, (tok, prob) in enumerate(zip(tokens, logprobs))],
hovertemplate='%{customdata}
{colored_text}
" else: colored_text_html = "No finite log probabilities to display." # Top Token Log Probabilities (Interactive Plotly, dynamic length) alt_viz_fig = create_empty_figure("Top Token Log Probabilities") if not logprobs or not top_alternatives else go.Figure() if logprobs and top_alternatives: for i, (token, probs) in enumerate(zip(tokens, top_alternatives)): for j, (alt_tok, prob) in enumerate(probs): alt_viz_fig.add_trace(go.Bar(x=[f"{token} (Pos {i})"], y=[prob], name=f"{alt_tok}", marker_color=['blue', 'green', 'red', 'purple', 'orange'][:len(probs)])) alt_viz_fig.update_layout( title="Top Token Log Probabilities", xaxis_title="Token (Position)", yaxis_title="Log Probability", barmode='stack', hovermode="closest", clickmode='event+select' ) alt_viz_fig.update_traces( customdata=[f"Token: {tok}, Alt: {alt}, Log Prob: {prob:.4f}, Position: {i}" for i, (tok, alts) in enumerate(zip(tokens, top_alternatives)) for alt, prob in alts], hovertemplate='%{customdata}