import os import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import gradio as gr # Set up the model and tokenizer MODEL_ID = "microsoft/Phi-3.5-mini-instruct" tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) os.environ['HF_TOKEN']=os.environ.get('HF_TOKEN') os.environ['HUGGINGFACEHUB_API_TOKEN']=os.environ.get('HF_TOKEN') # Load the model with quantization model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.bfloat16, device_map="auto" ) # Define the function for the Gradio interface def chat_with_phi(message): conversation = [{"role": "user", "content": message}] pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, ) response = pipe(conversation) return response[0]['generated_text'] # Set up the Gradio interface app = gr.Interface( fn=chat_with_phi, inputs=gr.Textbox(label="Type your message:"), outputs=gr.Textbox(label="Phi 3.5 Responds:"), title="Phi 3.5 Text Chat", description="Chat with Phi 3.5 model. Ask anything!", theme="huggingface" ) # Launch the app app.launch(debug=True)