File size: 460 Bytes
09f1499
 
 
 
 
 
 
e46a0b5
 
09f1499
e46a0b5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
from sentencepiece import SentencePieceProcessor
import gradio as gr

sp = SentencePieceProcessor(model_file="tokenizer.model")

def tokenize(input_text):
    tokens = sp.EncodeAsIds(input_text)
    decoded_tokens = sp.DecodeIds(tokens)
    return len(tokens), tokens

iface = gr.Interface(fn=tokenize, inputs=gr.inputs.Textbox(lines=7, label="Input Text"), outputs=[gr.outputs.Textbox(label="Token Count"), gr.outputs.Textbox(label="Tokens")])
iface.launch()