Spaces:

patpizio
/

llm-token-probs

Sleeping

App Files Files Community

patpizio commited on Jul 10, 2023

Commit

5cf0970

1 Parent(s): 31df035

Add plotting functionality

Browse files

Files changed (1) hide show

app.py +36 -1

app.py CHANGED Viewed

@@ -1,7 +1,41 @@
 import torch
 import streamlit as st
 from transformers import AutoTokenizer, T5Tokenizer, T5ForConditionalGeneration, GenerationConfig, AutoModelForCausalLM
 st.title('How do LLM choose their words?')
 col1, col2 = st.columns(2)
@@ -65,4 +99,5 @@ if instruction:
     st.write(output_text)
-    model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=False)

 import torch
 import streamlit as st
+import numpy as np
+import plotly.express as px, plotly.graph_objects as go
+from plotly.subplots import make_subplots
 from transformers import AutoTokenizer, T5Tokenizer, T5ForConditionalGeneration, GenerationConfig, AutoModelForCausalLM
+def top_token_ids(outputs, threshold=-np.inf):
+    "Returns the index of the tokens whose score exceeds a threshold, for each output step"
+    indexes = []
+    for tensor in outputs['scores']:
+        candidates = np.argwhere(tensor.flatten().cpu() > threshold).numpy()[0]
+        ordering_mask = np.argsort(tensor[0][candidates].cpu())
+        candidates = candidates[ordering_mask]
+        if not isinstance(candidates, np.ndarray):
+            indexes.append(np.array([candidates]))
+        else:
+            indexes.append(candidates)
+    return indexes
+def plot_word_scores(top_token_ids, outputs, tokenizer, boolq=False, width=600):
+    fig = make_subplots(rows=len(top_token_ids), cols=1)
+    for step, candidates in enumerate(top_token_ids):
+        fig.append_trace(
+            go.Bar(
+                y=tokenizer.convert_ids_to_tokens(candidates),
+                x=outputs['scores'][step][0][candidates].cpu(),
+                orientation='h'
+            ),
+            row=step+1, col=1
+        )
+    fig.update_layout(
+        width=500,
+        height=300*len(top_token_ids),
+        showlegend=False
+    )
+    return fig
 st.title('How do LLM choose their words?')
 col1, col2 = st.columns(2)
     st.write(output_text)
+    fig = plot_word_scores(top_token_ids(outputs, threshold=-1), outputs, tokenizer)
+    st.plotly_chart(fig, use_container_width=False)