Spaces:
Runtime error
Runtime error
import solara | |
import random | |
import torch | |
import torch.nn.functional as F | |
import pandas as pd | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
# Cargar el tokenizer y el modelo preentrenado en español | |
tokenizer = AutoTokenizer.from_pretrained("datificate/gpt2-small-spanish") | |
model = AutoModelForCausalLM.from_pretrained("datificate/gpt2-small-spanish") | |
# tokenizer = AutoTokenizer.from_pretrained('gpt2') | |
# model = AutoModelForCausalLM.from_pretrained('gpt2') | |
text1 = solara.reactive("Hola, ¿cómo estás") | |
def Page(): | |
with solara.Column(margin=10): | |
solara.Markdown("#Siguiente token prediction ") | |
solara.Markdown("Hemos creado esta herramienta para entender el funcionamiento de los modelos de lenguage autoregresivos... ") | |
def on_action_cell(column, row_index): | |
text1.value += tokenizer.decode(top_10.indices[0][row_index]) | |
cell_actions = [solara.CellAction(icon="mdi-thumb-up", name="Select", on_click=on_action_cell)] | |
solara.InputText("Enter text:", value=text1, continuous_update=True) | |
if text1.value != "": | |
tokens = tokenizer.encode(text1.value, return_tensors="pt") | |
spans1 = "" | |
spans2 = "" | |
for i, token in enumerate(tokens[0]): | |
random.seed(i) | |
random_color = ''.join([random.choice('0123456789ABCDEF') for k in range(6)]) | |
spans1 += " " + f"<span style='font-family: helvetica; color: #{random_color}'>{token}</span>" | |
spans2 += " " + f"""<span style=" | |
padding: 6px; | |
border-right: 3px solid white; | |
line-height: 3em; | |
font-family: courier; | |
background-color: #{random_color}; | |
color: white; | |
position: relative; | |
"><span style=" | |
position: absolute; | |
top: 5.5ch; | |
line-height: 1em; | |
left: -0.5px; | |
font-size: 0.45em"> {token}</span>{tokenizer.decode([token])}</span>""" | |
solara.Markdown(f'{spans2}') | |
solara.Markdown(f'{spans1}') | |
outputs = model.generate(tokens, max_new_tokens=1, output_scores=True, return_dict_in_generate=True, pad_token_id=tokenizer.eos_token_id) | |
scores = F.softmax(outputs.scores[0], dim=-1) | |
top_10 = torch.topk(scores, 10) | |
df = pd.DataFrame() | |
df["probs"] = top_10.values[0] | |
df["probs"] = [f"{value:.2%}" for value in df["probs"].values] | |
df["next token ID"] = [top_10.indices[0][i].numpy() for i in range(10)] | |
df["predicted next token"] = [tokenizer.decode(top_10.indices[0][i]) for i in range(10)] | |
solara.Markdown("###Prediction") | |
solara.DataFrame(df, items_per_page=10, cell_actions=cell_actions) | |
Page() | |