import streamlit as st import tiktoken from .content import TOKEN_ESTIMATOR_TEXT def num_tokens_from_string(string: str, encoding_name: str) -> int: """Returns the number of tokens in a text string.""" encoding = tiktoken.get_encoding(encoding_name) num_tokens = len(encoding.encode(string)) return num_tokens def token_estimator(): st.markdown("### 🪙 Tokens estimator") st.markdown("As our methodology deeply relies on the number of tokens processed by the model *(and as no-one is token-fluent)*, we provide you with a tool to estimate the number of tokens in a given text.") st.expander("ℹ️ What is a token anyway ?", expanded = False).markdown(TOKEN_ESTIMATOR_TEXT) user_text_input = st.text_area("Type or paste some text to estimate the amount of tokens.", "EcoLogits is a great project!") _, col2, _ = st.columns([2, 1, 2]) with col2: st.metric( label = 'tokens estimated amount', #label_visibility = 'hidden', value = num_tokens_from_string(user_text_input, "cl100k_base"), border = True )