import gradio as gr
from optimum.intel.openvino import OVStableDiffusionPipeline
from diffusers.training_utils import set_seed

import time

pipe_fp32 = OVStableDiffusionPipeline.from_pretrained("OpenVINO/stable-diffusion-pokemons-fp32", compile=False)
pipe_fp32.reshape(batch_size=1, height=512, width=512, num_images_per_prompt=1)
pipe_fp32.compile()

pipe_int8 = OVStableDiffusionPipeline.from_pretrained("OpenVINO/stable-diffusion-pokemons-quantized-aggressive", compile=False)
pipe_int8.reshape(batch_size=1, height=512, width=512, num_images_per_prompt=1)
pipe_int8.compile()

pipe_tome_int8 = OVStableDiffusionPipeline.from_pretrained("OpenVINO/stable-diffusion-pokemons-tome-quantized-aggressive", compile=False)
pipe_tome_int8.reshape(batch_size=1, height=512, width=512, num_images_per_prompt=1)
pipe_tome_int8.compile()

prompt = "cartoon bird"

pipes = {
    "FP32": pipe_fp32,
    "8-bit quantized": pipe_int8,
    "Merged and quantized": pipe_tome_int8
}

def generate(image, option):
    pipe = pipes[option]
    start_time = time.time()
    output = pipe(prompt, num_inference_steps=50, output_type="pil")
    elapsed_time = time.time() - start_time
    return (output.images[0], f"Inference time: {elapsed_time}")

examples = ["cartoon bird",
            "a drawing of a green pokemon with red eyes",
            "plant pokemon in jungle"]

gr.Interface(
    fn=generate,
    inputs=[gr.inputs.Textbox(placeholder="cartoon bird", label="Prompt", lines=1),
            gr.inputs.Dropdown(choices=[option for option in pipes.keys()], default="Merged and quantized", label="Model version"),
           ],
    outputs=[gr.outputs.Image(type="pil", label="Generated Image"), "text"],
    title="OpenVINO-optimized Stable Diffusion",
    description="This is the Optimum-based demo for NNCF-optimized Stable Diffusion pipeline trained on 'lambdalabs/pokemon-blip-captions' dataset and running with OpenVINO.\n"
                 "The pipeline is run using 8 vCPUs (4 cores) only.",
    theme="huggingface",
).launch()