Spaces:
Running
on
Zero
Running
on
Zero
from __future__ import annotations | |
import gradio as gr | |
from PIL import Image | |
import torch | |
from src.eunms import Model_Type, Scheduler_Type, Gradient_Averaging_Type, Epsilon_Update_Type | |
from src.enums_utils import model_type_to_size, get_pipes | |
from src.config import RunConfig | |
from main import run as run_model | |
DESCRIPTION = '''# ReNoise: Real Image Inversion Through Iterative Noising | |
This is a demo for our ''ReNoise: Real Image Inversion Through Iterative Noising'' [paper](https://garibida.github.io/ReNoise-Inversion/). Code is available [here](https://github.com/garibida/ReNoise-Inversion) | |
Our ReNoise inversion technique can be applied to various diffusion models, including recent few-step ones such as SDXL-Turbo. | |
This demo preform real image editing using our ReNoise inversion. The input image is resize to size of 512x512, the optimal size of SDXL Turbo. | |
''' | |
device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
model_type = Model_Type.SDXL_Turbo | |
scheduler_type = Scheduler_Type.EULER | |
image_size = model_type_to_size(Model_Type.SDXL_Turbo) | |
pipe_inversion, pipe_inference = get_pipes(model_type, scheduler_type, device=device) | |
cache_size = 10 | |
prev_configs = [None for i in range(cache_size)] | |
prev_inv_latents = [None for i in range(cache_size)] | |
prev_images = [None for i in range(cache_size)] | |
prev_noises = [None for i in range(cache_size)] | |
def main_pipeline( | |
input_image: str, | |
src_prompt: str, | |
tgt_prompt: str, | |
edit_cfg: float, | |
number_of_renoising_iterations: int, | |
inersion_strength: float, | |
avg_gradients: bool, | |
first_step_range_start: int, | |
first_step_range_end: int, | |
rest_step_range_start: int, | |
rest_step_range_end: int, | |
lambda_ac: float, | |
lambda_kl: float, | |
noise_correction: bool): | |
global prev_configs, prev_inv_latents, prev_images, prev_noises | |
update_epsilon_type = Epsilon_Update_Type.OPTIMIZE if noise_correction else Epsilon_Update_Type.NONE | |
avg_gradients_type = Gradient_Averaging_Type.ON_END if avg_gradients else Gradient_Averaging_Type.NONE | |
first_step_range = (first_step_range_start, first_step_range_end) | |
rest_step_range = (rest_step_range_start, rest_step_range_end) | |
config = RunConfig(model_type = model_type, | |
num_inference_steps = 4, | |
num_inversion_steps = 4, | |
guidance_scale = 0.0, | |
max_num_aprox_steps_first_step = first_step_range_end+1, | |
num_aprox_steps = number_of_renoising_iterations, | |
inversion_max_step = inersion_strength, | |
gradient_averaging_type = avg_gradients_type, | |
gradient_averaging_first_step_range = first_step_range, | |
gradient_averaging_step_range = rest_step_range, | |
scheduler_type = scheduler_type, | |
num_reg_steps = 4, | |
num_ac_rolls = 5, | |
lambda_ac = lambda_ac, | |
lambda_kl = lambda_kl, | |
update_epsilon_type = update_epsilon_type, | |
do_reconstruction = True) | |
config.prompt = src_prompt | |
inv_latent = None | |
noise_list = None | |
for i in range(cache_size): | |
if prev_configs[i] is not None and prev_configs[i] == config and prev_images[i] == input_image: | |
print(f"Using cache for config #{i}") | |
inv_latent = prev_inv_latents[i] | |
noise_list = prev_noises[i] | |
prev_configs.pop(i) | |
prev_inv_latents.pop(i) | |
prev_images.pop(i) | |
prev_noises.pop(i) | |
break | |
original_image = Image.open(input_image).convert("RGB").resize(image_size) | |
res_image, inv_latent, noise, all_latents = run_model(original_image, | |
config, | |
latents=inv_latent, | |
pipe_inversion=pipe_inversion, | |
pipe_inference=pipe_inference, | |
edit_prompt=tgt_prompt, | |
noise=noise_list, | |
edit_cfg=edit_cfg) | |
prev_configs.append(config) | |
prev_inv_latents.append(inv_latent) | |
prev_images.append(input_image) | |
prev_noises.append(noise) | |
if len(prev_configs) > cache_size: | |
print("Popping cache") | |
prev_configs.pop(0) | |
prev_inv_latents.pop(0) | |
prev_images.pop(0) | |
prev_noises.pop(0) | |
return res_image | |
with gr.Blocks(css='style.css') as demo: | |
gr.Markdown(DESCRIPTION) | |
gr.HTML( | |
'''<a href="https://huggingface.co/spaces/orpatashnik/local-prompt-mixing?duplicate=true"> | |
<img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space to run privately without waiting in queue''') | |
with gr.Row(): | |
with gr.Column(): | |
input_image = gr.Image( | |
label="Input image", | |
type="filepath", | |
height=image_size[0], | |
width=image_size[1] | |
) | |
src_prompt = gr.Text( | |
label='Source Prompt', | |
max_lines=1, | |
placeholder='A kitten is sitting in a basket on a branch', | |
) | |
tgt_prompt = gr.Text( | |
label='Target Prompt', | |
max_lines=1, | |
placeholder='A plush toy kitten is sitting in a basket on a branch', | |
) | |
with gr.Accordion("Advanced Options", open=False): | |
edit_cfg = gr.Slider( | |
label='Denoise Classifier-Free Guidence Scale', | |
minimum=1.0, | |
maximum=3.5, | |
value=1.0, | |
step=0.1 | |
) | |
number_of_renoising_iterations = gr.Slider( | |
label='Number of ReNoise Iterations', | |
minimum=0, | |
maximum=20, | |
value=9, | |
step=1 | |
) | |
inersion_strength = gr.Slider( | |
label='Inversion Strength', | |
minimum=0.0, | |
maximum=1.0, | |
value=1.0, | |
step=0.25 | |
) | |
avg_gradients = gr.Checkbox( | |
label="Preform Estimation Averaging" | |
) | |
first_step_range_start = gr.Slider( | |
label='First Estimation in Average (t < 250)', | |
minimum=0, | |
maximum=21, | |
value=0, | |
step=1 | |
) | |
first_step_range_end = gr.Slider( | |
label='Last Estimation in Average (t < 250)', | |
minimum=0, | |
maximum=21, | |
value=5, | |
step=1 | |
) | |
rest_step_range_start = gr.Slider( | |
label='First Estimation in Average (t > 250)', | |
minimum=0, | |
maximum=21, | |
value=8, | |
step=1 | |
) | |
rest_step_range_end = gr.Slider( | |
label='Last Estimation in Average (t > 250)', | |
minimum=0, | |
maximum=21, | |
value=10, | |
step=1 | |
) | |
num_reg_steps = 4 | |
num_ac_rolls = 5 | |
lambda_ac = gr.Slider( | |
label='Labmda AC', | |
minimum=0.0, | |
maximum=50.0, | |
value=20.0, | |
step=1.0 | |
) | |
lambda_kl = gr.Slider( | |
label='Labmda Patch KL', | |
minimum=0.0, | |
maximum=0.4, | |
value=0.065, | |
step=0.005 | |
) | |
noise_correction = gr.Checkbox( | |
label="Preform Noise Correction" | |
) | |
run_button = gr.Button('Edit') | |
with gr.Column(): | |
# result = gr.Gallery(label='Result') | |
result = gr.Image( | |
label="Result", | |
type="pil", | |
height=image_size[0], | |
width=image_size[1] | |
) | |
examples = [ | |
[ | |
"example_images/kitten.jpg", #input_image | |
"A kitten is sitting in a basket on a branch", #src_prompt | |
"a lego kitten is sitting in a basket on a branch", #tgt_prompt | |
1.0, #edit_cfg | |
9, #number_of_renoising_iterations | |
1.0, #inersion_strength | |
True, #avg_gradients | |
0, #first_step_range_start | |
5, #first_step_range_end | |
8, #rest_step_range_start | |
10, #rest_step_range_end | |
20.0, #lambda_ac | |
0.055, #lambda_kl | |
False #noise_correction | |
], | |
[ | |
"example_images/kitten.jpg", #input_image | |
"A kitten is sitting in a basket on a branch", #src_prompt | |
"a brokkoli is sitting in a basket on a branch", #tgt_prompt | |
1.0, #edit_cfg | |
9, #number_of_renoising_iterations | |
1.0, #inersion_strength | |
True, #avg_gradients | |
0, #first_step_range_start | |
5, #first_step_range_end | |
8, #rest_step_range_start | |
10, #rest_step_range_end | |
20.0, #lambda_ac | |
0.055, #lambda_kl | |
False #noise_correction | |
], | |
[ | |
"example_images/kitten.jpg", #input_image | |
"A kitten is sitting in a basket on a branch", #src_prompt | |
"a dog is sitting in a basket on a branch", #tgt_prompt | |
1.0, #edit_cfg | |
9, #number_of_renoising_iterations | |
1.0, #inersion_strength | |
True, #avg_gradients | |
0, #first_step_range_start | |
5, #first_step_range_end | |
8, #rest_step_range_start | |
10, #rest_step_range_end | |
20.0, #lambda_ac | |
0.055, #lambda_kl | |
False #noise_correction | |
], | |
[ | |
"example_images/monkey.jpeg", #input_image | |
"a monkey sitting on a tree branch in the forest", #src_prompt | |
"a beaver sitting on a tree branch in the forest", #tgt_prompt | |
1.0, #edit_cfg | |
9, #number_of_renoising_iterations | |
1.0, #inersion_strength | |
True, #avg_gradients | |
0, #first_step_range_start | |
5, #first_step_range_end | |
8, #rest_step_range_start | |
10, #rest_step_range_end | |
20.0, #lambda_ac | |
0.055, #lambda_kl | |
True #noise_correction | |
], | |
[ | |
"example_images/monkey.jpeg", #input_image | |
"a monkey sitting on a tree branch in the forest", #src_prompt | |
"a raccoon sitting on a tree branch in the forest", #tgt_prompt | |
1.0, #edit_cfg | |
9, #number_of_renoising_iterations | |
1.0, #inersion_strength | |
True, #avg_gradients | |
0, #first_step_range_start | |
5, #first_step_range_end | |
8, #rest_step_range_start | |
10, #rest_step_range_end | |
20.0, #lambda_ac | |
0.055, #lambda_kl | |
True #noise_correction | |
], | |
[ | |
"example_images/lion.jpeg", #input_image | |
"a lion is sitting in the grass at sunset", #src_prompt | |
"a tiger is sitting in the grass at sunset", #tgt_prompt | |
1.0, #edit_cfg | |
9, #number_of_renoising_iterations | |
1.0, #inersion_strength | |
True, #avg_gradients | |
0, #first_step_range_start | |
5, #first_step_range_end | |
8, #rest_step_range_start | |
10, #rest_step_range_end | |
20.0, #lambda_ac | |
0.055, #lambda_kl | |
True #noise_correction | |
] | |
] | |
gr.Examples(examples=examples, | |
inputs=[ | |
input_image, | |
src_prompt, | |
tgt_prompt, | |
edit_cfg, | |
number_of_renoising_iterations, | |
inersion_strength, | |
avg_gradients, | |
first_step_range_start, | |
first_step_range_end, | |
rest_step_range_start, | |
rest_step_range_end, | |
lambda_ac, | |
lambda_kl, | |
noise_correction | |
], | |
outputs=[ | |
result | |
], | |
fn=main_pipeline, | |
cache_examples=True) | |
inputs = [ | |
input_image, | |
src_prompt, | |
tgt_prompt, | |
edit_cfg, | |
number_of_renoising_iterations, | |
inersion_strength, | |
avg_gradients, | |
first_step_range_start, | |
first_step_range_end, | |
rest_step_range_start, | |
rest_step_range_end, | |
lambda_ac, | |
lambda_kl, | |
noise_correction | |
] | |
outputs = [ | |
result | |
] | |
run_button.click(fn=main_pipeline, inputs=inputs, outputs=outputs) | |
demo.queue(max_size=50).launch(share=True) |