import gradio as gr from langchain.llms import HuggingFacePipeline from transformers import AutoTokenizer, AutoModel import transformers import torch import warnings warnings.filterwarnings('ignore') model = 'MD1998/FLAN-T5-V1' tokenizer=AutoTokenizer.from_pretrained(model) pipeline=transformers.pipeline( "text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", max_length=64, do_sample=True, top_k=10, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id ) llm=HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature':0}) def greet(prompt): return llm(prompt) iface = gr.Interface(fn=greet, inputs="text", outputs="text") iface.launch()