Spaces:

multimodalart
/

pix2pix-zero

Runtime error

App Files Files Community

multimodalart HF staff commited on Feb 11, 2023

Commit

dbaf842

1 Parent(s): a25de9b

Test1

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +10 -0
.gitignore +6 -0
LICENSE +21 -0
app.py +19 -0
assets/.DS_Store +0 -0
assets/comparison.jpg +3 -0
assets/embeddings_sd_1.4/cat.pt +3 -0
assets/embeddings_sd_1.4/dog.pt +3 -0
assets/embeddings_sd_1.4/horse.pt +3 -0
assets/embeddings_sd_1.4/zebra.pt +3 -0
assets/grid_cat2dog.jpg +3 -0
assets/grid_dog2cat.jpg +3 -0
assets/grid_horse2zebra.jpg +3 -0
assets/grid_tree2fall.jpg +3 -0
assets/grid_zebra2horse.jpg +3 -0
assets/main.gif +3 -0
assets/method.jpeg +3 -0
assets/results_real.jpg +3 -0
assets/results_syn.jpg +3 -0
assets/results_teaser.jpg +0 -0
assets/test_images/cats/cat_1.png +0 -0
assets/test_images/cats/cat_2.png +0 -0
assets/test_images/cats/cat_3.png +0 -0
assets/test_images/cats/cat_4.png +0 -0
assets/test_images/cats/cat_5.png +0 -0
assets/test_images/cats/cat_6.png +0 -0
assets/test_images/cats/cat_7.png +0 -0
assets/test_images/cats/cat_8.png +0 -0
assets/test_images/cats/cat_9.png +0 -0
assets/test_images/dogs/dog_1.png +0 -0
assets/test_images/dogs/dog_2.png +0 -0
assets/test_images/dogs/dog_3.png +0 -0
assets/test_images/dogs/dog_4.png +0 -0
assets/test_images/dogs/dog_5.png +0 -0
assets/test_images/dogs/dog_6.png +0 -0
assets/test_images/dogs/dog_7.png +0 -0
assets/test_images/dogs/dog_8.png +0 -0
assets/test_images/dogs/dog_9.png +0 -0
environment.yml +23 -0
output/test_cat/edit/cat_1.png +0 -0
output/test_cat/inversion/cat_1.pt +3 -0
output/test_cat/prompt/cat_1.txt +1 -0
output/test_cat/reconstruction/cat_1.png +0 -0
output/test_cat2/edit/cat_1.png +0 -0
output/test_cat2/reconstruction/cat_1.png +0 -0
requirements.txt +11 -0
src/edit_real.py +65 -0
src/edit_synthetic.py +52 -0
src/inversion.py +64 -0
src/make_edit_direction.py +61 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/comparison.jpg filter=lfs diff=lfs merge=lfs -text
+assets/grid_cat2dog.jpg filter=lfs diff=lfs merge=lfs -text
+assets/grid_dog2cat.jpg filter=lfs diff=lfs merge=lfs -text
+assets/grid_horse2zebra.jpg filter=lfs diff=lfs merge=lfs -text
+assets/grid_tree2fall.jpg filter=lfs diff=lfs merge=lfs -text
+assets/grid_zebra2horse.jpg filter=lfs diff=lfs merge=lfs -text
+assets/main.gif filter=lfs diff=lfs merge=lfs -text
+assets/method.jpeg filter=lfs diff=lfs merge=lfs -text
+assets/results_real.jpg filter=lfs diff=lfs merge=lfs -text
+assets/results_syn.jpg filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+output
+scripts
+src/folder_*.py
+src/ig_*.py
+assets/edit_sentences
+src/utils/edit_pipeline_spatial.py

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2023 pix2pixzero
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

app.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import os
+import gradio as gr
+def update(name):
+    os.system('''python src/inversion.py  \
+        --input_image "assets/test_images/cats/cat_1.png" \
+        --results_folder "output/test_cat"
+    ''')
+    return f"Inverted!"
+with gr.Blocks() as demo:
+    gr.Markdown("Start typing below and then click **Run** to see the output.")
+    with gr.Row():
+        inp = gr.Textbox(placeholder="Do you want to invert?")
+        out = gr.Textbox()
+    btn = gr.Button("Run")
+    btn.click(fn=update, inputs=inp, outputs=out)
+demo.launch()

assets/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

assets/comparison.jpg ADDED Viewed

Git LFS Details

SHA256: acab8ed1680a42dd2f540e8188a43eb0d101895fca8ed36c0e06c8b351d2c276
Pointer size: 132 Bytes
Size of remote file: 3.39 MB

assets/embeddings_sd_1.4/cat.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa9441dc014d5e86567c5ef165e10b50d2a7b3a68d90686d0cd1006792adf334
+size 237300

assets/embeddings_sd_1.4/dog.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:becf079d61d7f35727bcc0d8506ddcdcddb61e62d611840ff3d18eca7fb6338c
+size 237300

assets/embeddings_sd_1.4/horse.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c5d499299544d11371f84674761292b0512055ef45776c700c0b0da164cbf6c7
+size 118949

assets/embeddings_sd_1.4/zebra.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a29f6a11d91f3a276e27326b7623fae9d61a3d253ad430bb868bd40fb7e02fec
+size 118949

assets/grid_cat2dog.jpg ADDED Viewed

Git LFS Details

SHA256: 0080134b70277af723e25c4627494fda8555d43a9f6376e682b67b3341d1f1f3
Pointer size: 132 Bytes
Size of remote file: 1.21 MB

assets/grid_dog2cat.jpg ADDED Viewed

Git LFS Details

SHA256: 0e5059ec1ad8e4b07fe8b715295e82fcead652b9c366733793674e84d51427d9
Pointer size: 132 Bytes
Size of remote file: 1.25 MB

assets/grid_horse2zebra.jpg ADDED Viewed

Git LFS Details

SHA256: a31e0a456e9323697c966e675b02403511ebf0b7c334416a8da91df1c14723df
Pointer size: 132 Bytes
Size of remote file: 1.05 MB

assets/grid_tree2fall.jpg ADDED Viewed

Git LFS Details

SHA256: 559ab066e4ef0972748d0a7f004d2ca18fd15062c667ac6665309727f6dc0cc8
Pointer size: 132 Bytes
Size of remote file: 1.63 MB

assets/grid_zebra2horse.jpg ADDED Viewed

Git LFS Details

SHA256: b44b4aa4576be49289515f0aa9023dfd4424b3ba2476c66516b876dd83a06713
Pointer size: 132 Bytes
Size of remote file: 1.05 MB

assets/main.gif ADDED Viewed

Git LFS Details

SHA256: d1ebc380a461c4847beece13bdc9b5ea88312e8a8013f384eb8809109ff198fc
Pointer size: 132 Bytes
Size of remote file: 6.19 MB

assets/method.jpeg ADDED Viewed

Git LFS Details

SHA256: 8b1b4ea3608b9ad3797c4c7423bf2fd88e5e24f34fecbb00d3d2de22a99fd2ee
Pointer size: 132 Bytes
Size of remote file: 2.35 MB

assets/results_real.jpg ADDED Viewed

Git LFS Details

SHA256: 94095526e76b7a000ed56df15f7b5208c0f5a069b20b04fc9bcade14c54d92dc
Pointer size: 132 Bytes
Size of remote file: 1.48 MB

assets/results_syn.jpg ADDED Viewed

Git LFS Details

SHA256: 5731190e33098406995de563ca12bd6d2f84d9db725618a6d6580b4d1f2f0813
Pointer size: 132 Bytes
Size of remote file: 1.28 MB

assets/results_teaser.jpg ADDED Viewed

assets/test_images/cats/cat_1.png ADDED Viewed

assets/test_images/cats/cat_2.png ADDED Viewed

assets/test_images/cats/cat_3.png ADDED Viewed

assets/test_images/cats/cat_4.png ADDED Viewed

assets/test_images/cats/cat_5.png ADDED Viewed

assets/test_images/cats/cat_6.png ADDED Viewed

assets/test_images/cats/cat_7.png ADDED Viewed

assets/test_images/cats/cat_8.png ADDED Viewed

assets/test_images/cats/cat_9.png ADDED Viewed

assets/test_images/dogs/dog_1.png ADDED Viewed

assets/test_images/dogs/dog_2.png ADDED Viewed

assets/test_images/dogs/dog_3.png ADDED Viewed

assets/test_images/dogs/dog_4.png ADDED Viewed

assets/test_images/dogs/dog_5.png ADDED Viewed

assets/test_images/dogs/dog_6.png ADDED Viewed

assets/test_images/dogs/dog_7.png ADDED Viewed

assets/test_images/dogs/dog_8.png ADDED Viewed

assets/test_images/dogs/dog_9.png ADDED Viewed

environment.yml ADDED Viewed

	@@ -0,0 +1,23 @@

+name: pix2pix-zero
+channels:
+  - pytorch
+  - nvidia
+  - defaults
+dependencies:
+  - pip
+  - pytorch-cuda=11.6
+  - torchvision
+  - pytorch
+  - pip:
+    - accelerate
+    - diffusers
+    - einops
+    - gradio
+    - ipython
+    - numpy
+    - opencv-python-headless
+    - pillow
+    - psutil
+    - tqdm
+    - transformers
+    - salesforce-lavis

output/test_cat/edit/cat_1.png ADDED Viewed

output/test_cat/inversion/cat_1.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1fd7cd2554d2d695841ede6038f7906b50085841706a2f62429ee32c08a0dc85
+size 66283

output/test_cat/prompt/cat_1.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ a dog with his paws on top of a ball, painting

output/test_cat/reconstruction/cat_1.png ADDED Viewed

output/test_cat2/edit/cat_1.png ADDED Viewed

output/test_cat2/reconstruction/cat_1.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+accelerate
+diffusers
+einops
+numpy
+opencv-python-headless
+pillow
+psutil
+transformers
+tqdm
+pytorch
+salesforce-lavis

src/edit_real.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import os, pdb
+import argparse
+import numpy as np
+import torch
+import requests
+from PIL import Image
+from diffusers import DDIMScheduler
+from utils.ddim_inv import DDIMInversion
+from utils.edit_directions import construct_direction
+from utils.edit_pipeline import EditingPipeline
+if __name__=="__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--inversion', required=True)
+    parser.add_argument('--prompt', type=str, required=True)
+    parser.add_argument('--task_name', type=str, default='cat2dog')
+    parser.add_argument('--results_folder', type=str, default='output/test_cat')
+    parser.add_argument('--num_ddim_steps', type=int, default=50)
+    parser.add_argument('--model_path', type=str, default='CompVis/stable-diffusion-v1-4')
+    parser.add_argument('--xa_guidance', default=0.1, type=float)
+    parser.add_argument('--negative_guidance_scale', default=5.0, type=float)
+    parser.add_argument('--use_float_16', action='store_true')
+    args = parser.parse_args()
+    os.makedirs(os.path.join(args.results_folder, "edit"), exist_ok=True)
+    os.makedirs(os.path.join(args.results_folder, "reconstruction"), exist_ok=True)
+    if args.use_float_16:
+        torch_dtype = torch.float16
+    else:
+        torch_dtype = torch.float32
+    # if the inversion is a folder, the prompt should also be a folder
+    assert (os.path.isdir(args.inversion)==os.path.isdir(args.prompt)), "If the inversion is a folder, the prompt should also be a folder"
+    if os.path.isdir(args.inversion):
+        l_inv_paths = sorted(glob(os.path.join(args.inversion, "*.pt")))
+        l_bnames = [os.path.basename(x) for x in l_inv_paths]
+        l_prompt_paths = [os.path.join(args.prompt, x.replace(".pt",".txt")) for x in l_bnames]
+    else:
+        l_inv_paths = [args.inversion]
+        l_prompt_paths = [args.prompt]
+    # Make the editing pipeline
+    pipe = EditingPipeline.from_pretrained(args.model_path, torch_dtype=torch_dtype).to("cuda")
+    pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
+    for inv_path, prompt_path in zip(l_inv_paths, l_prompt_paths):
+        prompt_str = open(prompt_path).read().strip()
+        rec_pil, edit_pil = pipe(prompt_str,
+                num_inference_steps=args.num_ddim_steps,
+                x_in=torch.load(inv_path).unsqueeze(0),
+                edit_dir=construct_direction(args.task_name),
+                guidance_amount=args.xa_guidance,
+                guidance_scale=args.negative_guidance_scale,
+                negative_prompt=prompt_str # use the unedited prompt for the negative prompt
+        )
+        bname = os.path.basename(args.inversion).split(".")[0]
+        edit_pil[0].save(os.path.join(args.results_folder, f"edit/{bname}.png"))
+        rec_pil[0].save(os.path.join(args.results_folder, f"reconstruction/{bname}.png"))

src/edit_synthetic.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import os, pdb
+import argparse
+import numpy as np
+import torch
+import requests
+from PIL import Image
+from diffusers import DDIMScheduler
+from utils.edit_directions import construct_direction
+from utils.edit_pipeline import EditingPipeline
+if __name__=="__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--prompt_str', type=str, required=True)
+    parser.add_argument('--random_seed', default=0)
+    parser.add_argument('--task_name', type=str, default='cat2dog')
+    parser.add_argument('--results_folder', type=str, default='output/test_cat')
+    parser.add_argument('--num_ddim_steps', type=int, default=50)
+    parser.add_argument('--model_path', type=str, default='CompVis/stable-diffusion-v1-4')
+    parser.add_argument('--xa_guidance', default=0.15, type=float)
+    parser.add_argument('--negative_guidance_scale', default=5.0, type=float)
+    parser.add_argument('--use_float_16', action='store_true')
+    args = parser.parse_args()
+    os.makedirs(args.results_folder, exist_ok=True)
+    if args.use_float_16:
+        torch_dtype = torch.float16
+    else:
+        torch_dtype = torch.float32
+    # make the input noise map
+    torch.cuda.manual_seed(args.random_seed)
+    x = torch.randn((1,4,64,64), device="cuda")
+    # Make the editing pipeline
+    pipe = EditingPipeline.from_pretrained(args.model_path, torch_dtype=torch_dtype).to("cuda")
+    pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
+    rec_pil, edit_pil = pipe(args.prompt_str,
+        num_inference_steps=args.num_ddim_steps,
+        x_in=x,
+        edit_dir=construct_direction(args.task_name),
+        guidance_amount=args.xa_guidance,
+        guidance_scale=args.negative_guidance_scale,
+        negative_prompt="" # use the empty string for the negative prompt
+    )
+    edit_pil[0].save(os.path.join(args.results_folder, f"edit.png"))
+    rec_pil[0].save(os.path.join(args.results_folder, f"reconstruction.png"))

src/inversion.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import os, pdb
+import argparse
+import numpy as np
+import torch
+import requests
+from PIL import Image
+from lavis.models import load_model_and_preprocess
+from utils.ddim_inv import DDIMInversion
+from utils.scheduler import DDIMInverseScheduler
+if __name__=="__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input_image', type=str, default='assets/test_images/cat_a.png')
+    parser.add_argument('--results_folder', type=str, default='output/test_cat')
+    parser.add_argument('--num_ddim_steps', type=int, default=50)
+    parser.add_argument('--model_path', type=str, default='CompVis/stable-diffusion-v1-4')
+    parser.add_argument('--use_float_16', action='store_true')
+    args = parser.parse_args()
+    # make the output folders
+    os.makedirs(os.path.join(args.results_folder, "inversion"), exist_ok=True)
+    os.makedirs(os.path.join(args.results_folder, "prompt"), exist_ok=True)
+    if args.use_float_16:
+        torch_dtype = torch.float16
+    else:
+        torch_dtype = torch.float32
+    # load the BLIP model
+    model_blip, vis_processors, _ = load_model_and_preprocess(name="blip_caption", model_type="base_coco", is_eval=True, device=torch.device("cuda"))
+    # make the DDIM inversion pipeline
+    pipe = DDIMInversion.from_pretrained(args.model_path, torch_dtype=torch_dtype).to("cuda")
+    pipe.scheduler = DDIMInverseScheduler.from_config(pipe.scheduler.config)
+    # if the input is a folder, collect all the images as a list
+    if os.path.isdir(args.input_image):
+        l_img_paths = sorted(glob(os.path.join(args.input_image, "*.png")))
+    else:
+        l_img_paths = [args.input_image]
+    for img_path in l_img_paths:
+        bname = os.path.basename(args.input_image).split(".")[0]
+        img = Image.open(args.input_image).resize((512,512), Image.Resampling.LANCZOS)
+        # generate the caption
+        _image = vis_processors["eval"](img).unsqueeze(0).cuda()
+        prompt_str = model_blip.generate({"image": _image})[0]
+        x_inv, x_inv_image, x_dec_img = pipe(
+            prompt_str,
+            guidance_scale=1,
+            num_inversion_steps=args.num_ddim_steps,
+            img=img,
+            torch_dtype=torch_dtype
+        )
+        # save the inversion
+        torch.save(x_inv[0], os.path.join(args.results_folder, f"inversion/{bname}.pt"))
+        # save the prompt string
+        with open(os.path.join(args.results_folder, f"prompt/{bname}.txt"), "w") as f:
+            f.write(prompt_str)

src/make_edit_direction.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import os, pdb
+import argparse
+import numpy as np
+import torch
+import requests
+from PIL import Image
+from diffusers import DDIMScheduler
+from utils.edit_pipeline import EditingPipeline
+## convert sentences to sentence embeddings
+def load_sentence_embeddings(l_sentences, tokenizer, text_encoder, device="cuda"):
+    with torch.no_grad():
+        l_embeddings = []
+        for sent in l_sentences:
+            text_inputs = tokenizer(
+                    sent,
+                    padding="max_length",
+                    max_length=tokenizer.model_max_length,
+                    truncation=True,
+                    return_tensors="pt",
+                )
+            text_input_ids = text_inputs.input_ids
+            prompt_embeds = text_encoder(text_input_ids.to(device), attention_mask=None)[0]
+            l_embeddings.append(prompt_embeds)
+    return torch.concatenate(l_embeddings, dim=0).mean(dim=0).unsqueeze(0)
+if __name__=="__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--file_source_sentences', required=True)
+    parser.add_argument('--file_target_sentences', required=True)
+    parser.add_argument('--output_folder', required=True)
+    parser.add_argument('--model_path', type=str, default='CompVis/stable-diffusion-v1-4')
+    args = parser.parse_args()
+    # load the model
+    pipe = EditingPipeline.from_pretrained(args.model_path, torch_dtype=torch.float16).to("cuda")
+    bname_src = os.path.basename(args.file_source_sentences).strip(".txt")
+    outf_src = os.path.join(args.output_folder, bname_src+".pt")
+    if os.path.exists(outf_src):
+        print(f"Skipping source file {outf_src} as it already exists")
+    else:
+        with open(args.file_source_sentences, "r") as f:
+            l_sents = [x.strip() for x in f.readlines()]
+        mean_emb = load_sentence_embeddings(l_sents, pipe.tokenizer, pipe.text_encoder, device="cuda")
+        print(mean_emb.shape)
+        torch.save(mean_emb, outf_src)
+    bname_tgt = os.path.basename(args.file_target_sentences).strip(".txt")
+    outf_tgt = os.path.join(args.output_folder, bname_tgt+".pt")
+    if os.path.exists(outf_tgt):
+        print(f"Skipping target file {outf_tgt} as it already exists")
+    else:
+        with open(args.file_target_sentences, "r") as f:
+            l_sents = [x.strip() for x in f.readlines()]
+        mean_emb = load_sentence_embeddings(l_sents, pipe.tokenizer, pipe.text_encoder, device="cuda")
+        print(mean_emb.shape)
+        torch.save(mean_emb, outf_tgt)