# Load model directly from transformers import pipeline import gradio as gr import torch # Check if CUDA is available, otherwise use CPU device = "cuda" if torch.cuda.is_available() else "cpu" pipe = pipeline("text-generation", model="microsoft/BioGPT-Large", device=device) def question(message, history): # Generate the response response = pipe(message, max_length=200)[0]['generated_text'] return response #Description in Markdown description = program = gr.ChatInterface(question,description=description,title="Microsoft BioGPT Large Chat") if __name__ == "__main__": program.launch()