import streamlit as st
import streamlit.components.v1 as components
from transformers import AutoTokenizer, AutoModelForCausalLM
# Set page configuration
st.set_page_config(page_title="First Conscious Quadrant with Detokenizer", layout="wide")
# Title
st.title("First Conscious Quadrant with Detokenizer")
# HTML content (your original HTML/JS code)
html_content = """
Interactive Base 50256 Grid
Click on the grid to select a coordinate
Hover Coordinate: (X: 0, Y: 0)
"""
# Embed the HTML content
components.html(html_content, height=700, scrolling=True)
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained('gpt2')
# Detokenization section
st.header("Detokenization")
token_ids = st.text_input("Enter token IDs (concatenated without spaces):", "")
def split_token_ids(concatenated_ids, length=5):
return [concatenated_ids[i:i+length] for i in range(0, len(concatenated_ids), length)]
def remove_leading_zeros(grouped_ids):
return [id.lstrip('0') for id in grouped_ids]
if st.button("Detokenize"):
split_ids = split_token_ids(token_ids)
cleaned_ids = remove_leading_zeros(split_ids)
cleaned_token_ids_str = ' '.join(cleaned_ids)
token_id_list = [int(id) for id in cleaned_ids if id.isdigit()]
detokenized_sentence = tokenizer.decode(token_id_list)
st.write("Grouped and cleaned token IDs:")
st.write(cleaned_token_ids_str)
st.write("Detokenized sentence:")
st.write(detokenized_sentence)
# Load the model
gpt2 = AutoModelForCausalLM.from_pretrained('gpt2')
# Display help for the GPT-2 model
if st.checkbox("Show GPT-2 Model Help"):
st.write("Help GPT2")
st.help(gpt2)
# JavaScript to handle messages from the iframe
components.html(
"""
"""
)