import torch
from diffusers import StableDiffusionPipeline
import gradio as gr

model_id = "SG161222/RealVisXL_V4.0"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe.to("cpu")  # Use "cuda" if GPU is available

unet = pipe.unet

def generate_image(prompt, unet, pipe):
    # Encode the prompt
    text_encoding = pipe.text_encoder(prompt, return_tensors="pt").to(unet.device)
    
    # Generate the image
    image = unet(text_embeddings=text_encoding.last_hidden_state).images[0]
    return image

def chatbot(prompt):
    # Generate the image based on the user's input
    image = generate_image(prompt, unet, pipe)
    return image

def get_aug_embed(self, text_embeds, image):
    if text_embeds is None:
        text_embeds = self.text_encoder(
            text_embeds=text_embeds,
            image=image,
            height=self.unet.config.sample_size,
            width=self.unet.config.sample_size,
        )
    return text_embeds
    
# Create the Gradio interface
interface = gr.Interface(
    fn=chatbot,
    inputs="text",
    outputs="image",
    title="RealVisXL V4.0 Text-to-Image Chatbot",
    description="Enter a text prompt and get an AI-generated image."
)

# Launch the interface
interface.launch()