import streamlit as st | |
import tiktoken | |
from .content import TOKEN_ESTIMATOR_TEXT | |
def num_tokens_from_string(string: str, encoding_name: str) -> int: | |
"""Returns the number of tokens in a text string.""" | |
encoding = tiktoken.get_encoding(encoding_name) | |
num_tokens = len(encoding.encode(string)) | |
return num_tokens | |
def token_estimator(): | |
st.markdown("### 🪙 Tokens estimator") | |
st.markdown("As our methodology deeply relies on the number of tokens processed by the model *(and as no-one is token-fluent)*, we provide you with a tool to estimate the number of tokens in a given text.") | |
st.expander("ℹ️ What is a token anyway ?", expanded = False).markdown(TOKEN_ESTIMATOR_TEXT) | |
user_text_input = st.text_area("Type or paste some text to estimate the amount of tokens.", "EcoLogits is a great project!") | |
_, col2, _ = st.columns([2, 1, 2]) | |
with col2: | |
st.metric( | |
label = 'tokens estimated amount', | |
#label_visibility = 'hidden', | |
value = num_tokens_from_string(user_text_input, "cl100k_base"), | |
border = True | |
) |