File size: 2,961 Bytes
31772c8
4a2914e
 
31772c8
1bb8a30
6f4ebfe
31772c8
 
4a2914e
 
 
 
 
31772c8
4a2914e
31772c8
4a2914e
31772c8
09d39fb
31772c8
 
 
 
4a2914e
 
31772c8
 
 
 
4a2914e
 
31772c8
4a2914e
31772c8
4a2914e
 
 
 
31772c8
 
 
547d326
 
 
 
31772c8
 
112da9b
547d326
c32a002
112da9b
 
 
 
 
 
 
5c058da
 
31772c8
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import gradio as gr
import torch
from diffusers import DiffusionPipeline, AutoencoderKL
from PIL import Image
import spaces  

# Initialize the VAE model and Diffusion Pipeline outside the GPU-enabled function for efficiency
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
pipe = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    vae=vae,
    torch_dtype=torch.float16,
    variant="fp16",
    use_safetensors=True
)
pipe.load_lora_weights('ritwikraha/khabib_sketch_LoRA')
if torch.cuda.is_available():
    _ = pipe.to("cuda")

# Define the image generation function
@spaces.GPU(enable_queue=True)
def generate_sketch(prompt, negative_prompt="ugly face, multiple bodies, bad anatomy, disfigured, extra fingers", guidance_scale=3, num_inference_steps=50):
    """Generate a sketch image based on a prompt using Stable Diffusion XL with LoRA weights.

    Args:
        prompt (str): Description of the image to generate.
        negative_prompt (str, optional): Negative prompt to avoid certain features. Defaults to common undesirables.
        guidance_scale (int, optional): The strength of the guidance. Defaults to 3.
        num_inference_steps (int, optional): The number of steps for the diffusion process. Defaults to 50.

    Returns:
        PIL.Image: The generated sketch image.
    """
    result = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        guidance_scale=guidance_scale,
        num_inference_steps=num_inference_steps,
    )
    return result.images[0].convert("RGB")  # Ensure the image is in RGB format

# Gradio Interface
description = """
This demo utilizes the SDXL model LoRA adaption weights for stabilityai/stable-diffusion-xl-base-1.0. The weights were trained on sketches of Khabib by ritwikraha using DreamBooth.
"""
# Setup Gradio interface
with gr.Blocks() as demo:
    gr.HTML("<h1><center>Khabib Sketch Maker 🥋</center></h1>")
    gr.Markdown(description)
    with gr.Group():
        with gr.Row():
            with gr.Column():
                prompt_input = gr.Textbox(label="Enter your image prompt", value="a sketch of TOK khabib dancing, monchrome, pen sketch", scale=8)
                negative_prompt_input = gr.Textbox(label="Enter negative prompt", value="ugly face, multiple bodies, bad anatomy, disfigured, extra fingers", lines=2)
                guidance_scale_slider = gr.Slider(label="Guidance Scale", minimum=1, maximum=5, value=3)
                steps_slider = gr.Slider(label="Number of Inference Steps", minimum=20, maximum=100, value=50)
                submit_button = gr.Button("Submit")
            with gr.Column():
                output_image = gr.Image(label="Generated Sketch")
    
    submit_button.click(
        fn=generate_sketch,
        inputs=[prompt_input, negative_prompt_input, guidance_scale_slider, steps_slider],
        outputs=output_image
    )

demo.launch()