import gradio as gr from paddlenlp.transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B") model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B", dtype="float32") def inference(input_text): print(input_text) print(type(input_text)) input_features = tokenizer(input_text, return_tensors="pd") outputs = model.generate(**input_features, max_new_tokens=128)#max_length=128) output_text = tokenizer.batch_decode(outputs[0], skip_special_tokens=True)[0] return output_text title = 'PaddlePaddle Meets LLM' description = 'What is special: underlying execution is using PaddlePaddle and PaddleNLP!' article = "

PaddleNLP Github Repo

" examples = ['请自我介绍一下。', '今天吃什么好呢?'] demo = gr.Interface( inference, inputs="text", outputs="text", title=title, description=description, article=article, examples=examples, ) if __name__ == "__main__": demo.launch()