import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer from threading import Thread model_id = "AuriLab/gpt-bi" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) def generar_texto(prompt): streamer = TextIteratorStreamer(tokenizer, skip_prompt=True) inputs = tokenizer(prompt, return_tensors="pt") thread = Thread(target=model.generate, kwargs={ "input_ids": inputs.input_ids, "max_new_tokens": 50, "streamer": streamer, "do_sample": True }) thread.start() output = prompt yield output for new_token in streamer: output += new_token yield output textbox = gr.Textbox( lines=5, placeholder="Escribe tu prompt aquí...", label="Generación en tiempo real" ) interfaz = gr.Interface( fn=generar_texto, inputs=textbox, outputs=textbox, title="Generador de Texto con Streamer", description="Escribe y los tokens aparecerán en tiempo real en la misma caja" ) if __name__ == "__main__": interfaz.launch()