import torch from diffusers import StableDiffusionPipeline import gradio as gr model_id = "SG161222/RealVisXL_V4.0" pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) pipe.to("cpu") # Use "cuda" if GPU is available unet = pipe.unet def generate_image(prompt, unet, pipe): # Encode the prompt text_encoding = pipe.text_encoder(prompt, return_tensors="pt").to(unet.device) # Generate the image image = unet(text_embeddings=text_encoding.last_hidden_state).images[0] return image def chatbot(prompt): # Generate the image based on the user's input image = generate_image(prompt, unet, pipe) return image def get_aug_embed(self, text_embeds, image): if text_embeds is None: text_embeds = self.text_encoder( text_embeds=text_embeds, image=image, height=self.unet.config.sample_size, width=self.unet.config.sample_size, ) return text_embeds # Create the Gradio interface interface = gr.Interface( fn=chatbot, inputs="text", outputs="image", title="RealVisXL V4.0 Text-to-Image Chatbot", description="Enter a text prompt and get an AI-generated image." ) # Launch the interface interface.launch()