import torch from transformers import AutoModelForCausalLM, AutoTokenizer # torch.set_default_device("cuda") model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", torch_dtype="auto", trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True) def greet(text): inputs = tokenizer(text, return_tensors="pt", return_attention_mask=False) outputs = model.generate(**inputs, max_length=200) _text = tokenizer.batch_decode(outputs)[0] return _text demo = gr.Interface(fn=greet, inputs="text", outputs="text") demo.launch()