import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch tokenizer = AutoTokenizer.from_pretrained("fubuki119/JokesGPT") model = AutoModelForCausalLM.from_pretrained("fubuki119/JokesGPT") def generate(max_length): starting_text = "JOKE:" end_token = "<|endoftext|>" device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) model.eval() with torch.no_grad(): cur_ids = torch.tensor(tokenizer.encode("JOKE:") ).unsqueeze(0).to(device) for i in range(max_length): outputs = model(cur_ids) logits, _ = outputs[:] softmax_logits = torch.softmax(logits[0, -1], dim=0) next_token_id = torch.multinomial(softmax_logits, 1).item() cur_ids = torch.cat([cur_ids, torch.ones( (1, 1)).long().to(device) * next_token_id], dim=1) if next_token_id == tokenizer.encode(end_token)[0]: joke = cur_ids.detach().cpu().tolist() joke = joke[0] return tokenizer.decode(joke[3:-1]) joke = cur_ids.detach().cpu().tolist() joke = joke[0] return tokenizer.decode(joke[3:]) def fn(n): print(f"i got {n}") return "thanks" iface = gr.Interface( fn=generate, inputs=gr.Number(value=200, label="Maxlen"), outputs="text", ) iface.launch(share=True, debug=True)