import streamlit as st import streamlit.components.v1 as components from transformers import AutoTokenizer, AutoModelForCausalLM # Set page configuration st.set_page_config(page_title="First Conscious Quadrant with Detokenizer", layout="wide") # Title st.title("First Conscious Quadrant with Detokenizer") # HTML content (your original HTML/JS code) html_content = """ Interactive Base 50256 Grid
Click on the grid to select a coordinate
Hover Coordinate: (X: 0, Y: 0)
""" # Embed the HTML content components.html(html_content, height=700, scrolling=True) # Load the tokenizer tokenizer = AutoTokenizer.from_pretrained('gpt2') # Detokenization section st.header("Detokenization") token_ids = st.text_input("Enter token IDs (concatenated without spaces):", "") def split_token_ids(concatenated_ids, length=5): return [concatenated_ids[i:i+length] for i in range(0, len(concatenated_ids), length)] def remove_leading_zeros(grouped_ids): return [id.lstrip('0') for id in grouped_ids] if st.button("Detokenize"): split_ids = split_token_ids(token_ids) cleaned_ids = remove_leading_zeros(split_ids) cleaned_token_ids_str = ' '.join(cleaned_ids) token_id_list = [int(id) for id in cleaned_ids if id.isdigit()] detokenized_sentence = tokenizer.decode(token_id_list) st.write("Grouped and cleaned token IDs:") st.write(cleaned_token_ids_str) st.write("Detokenized sentence:") st.write(detokenized_sentence) # Load the model gpt2 = AutoModelForCausalLM.from_pretrained('gpt2') # Display help for the GPT-2 model if st.checkbox("Show GPT-2 Model Help"): st.write("Help GPT2") st.help(gpt2) # JavaScript to handle messages from the iframe components.html( """ """ )