Update README.md
Browse files
README.md
CHANGED
@@ -25,41 +25,23 @@ pip install diffusers transformers
|
|
25 |
### Text to image
|
26 |
|
27 |
```python
|
28 |
-
from diffusers import
|
29 |
import torch
|
30 |
|
31 |
-
|
32 |
-
pipe_prior = KandinskyPriorPipeline.from_pretrained("kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16)
|
33 |
pipe_prior.to("cuda")
|
34 |
|
|
|
|
|
|
|
35 |
prompt = "A alien cheeseburger creature eating itself, claymation, cinematic, moody lighting"
|
36 |
negative_prompt = "low quality, bad quality"
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
).images
|
41 |
-
|
42 |
-
zero_image_emb = pipe_prior(
|
43 |
-
negative_prompt, guidance_scale=1.0, num_inference_steps=25, generator=generator, negative_prompt=negative_prompt
|
44 |
-
).images
|
45 |
-
|
46 |
-
pipe = KandinskyPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
|
47 |
-
pipe.to("cuda")
|
48 |
-
|
49 |
-
|
50 |
-
images = pipe(
|
51 |
-
prompt,
|
52 |
-
image_embeds=image_emb,
|
53 |
-
negative_image_embeds=zero_image_emb,
|
54 |
-
num_images_per_prompt=2,
|
55 |
-
height=768,
|
56 |
-
width=768,
|
57 |
-
num_inference_steps=100,
|
58 |
-
guidance_scale=4.0,
|
59 |
-
generator=generator,
|
60 |
-
).images[0]
|
61 |
|
62 |
-
image
|
|
|
63 |
```
|
64 |
|
65 |
![img](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/kandinsky-docs/cheeseburger.png)
|
@@ -81,7 +63,9 @@ original_image = Image.open(BytesIO(response.content)).convert("RGB")
|
|
81 |
original_image = original_image.resize((768, 512))
|
82 |
|
83 |
# create prior
|
84 |
-
pipe_prior = KandinskyPriorPipeline.from_pretrained(
|
|
|
|
|
85 |
pipe_prior.to("cuda")
|
86 |
|
87 |
# create img2img pipeline
|
@@ -91,22 +75,16 @@ pipe.to("cuda")
|
|
91 |
prompt = "A fantasy landscape, Cinematic lighting"
|
92 |
negative_prompt = "low quality, bad quality"
|
93 |
|
94 |
-
|
95 |
-
|
96 |
-
).images
|
97 |
-
|
98 |
-
zero_image_emb = pipe_prior(
|
99 |
-
negative_prompt, guidance_scale=4.0, num_inference_steps=25, generator=generator, negative_prompt=negative_prompt
|
100 |
-
).images
|
101 |
|
102 |
out = pipe(
|
103 |
prompt,
|
104 |
image=original_image,
|
105 |
-
image_embeds=
|
106 |
-
negative_image_embeds=
|
107 |
height=768,
|
108 |
width=768,
|
109 |
-
num_inference_steps=500,
|
110 |
strength=0.3,
|
111 |
)
|
112 |
|
@@ -124,9 +102,10 @@ from diffusers.utils import load_image
|
|
124 |
import PIL
|
125 |
|
126 |
import torch
|
127 |
-
from torchvision import transforms
|
128 |
|
129 |
-
pipe_prior = KandinskyPriorPipeline.from_pretrained(
|
|
|
|
|
130 |
pipe_prior.to("cuda")
|
131 |
|
132 |
img1 = load_image(
|
@@ -137,16 +116,20 @@ img2 = load_image(
|
|
137 |
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/kandinsky/starry_night.jpeg"
|
138 |
)
|
139 |
|
|
|
140 |
images_texts = ["a cat", img1, img2]
|
|
|
|
|
141 |
weights = [0.3, 0.3, 0.4]
|
142 |
-
|
|
|
|
|
|
|
143 |
|
144 |
pipe = KandinskyPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
|
145 |
pipe.to("cuda")
|
146 |
|
147 |
-
image = pipe(
|
148 |
-
"", image_embeds=image_emb, negative_image_embeds=zero_image_emb, height=768, width=768, num_inference_steps=150
|
149 |
-
).images[0]
|
150 |
|
151 |
image.save("starry_cat.png")
|
152 |
```
|
|
|
25 |
### Text to image
|
26 |
|
27 |
```python
|
28 |
+
from diffusers import DiffusionPipeline
|
29 |
import torch
|
30 |
|
31 |
+
pipe_prior = DiffusionPipeline.from_pretrained("kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16)
|
|
|
32 |
pipe_prior.to("cuda")
|
33 |
|
34 |
+
t2i_pipe = DiffusionPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
|
35 |
+
t2i_pipe.to("cuda")
|
36 |
+
|
37 |
prompt = "A alien cheeseburger creature eating itself, claymation, cinematic, moody lighting"
|
38 |
negative_prompt = "low quality, bad quality"
|
39 |
|
40 |
+
generator = torch.Generator(device="cuda").manual_seed(12)
|
41 |
+
image_embeds, negative_image_embeds = pipe_prior(prompt, negative_prompt, generator=generator).to_tuple()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
+
image = t2i_pipe(prompt, image_embeds=image_embeds, negative_image_embeds=negative_image_embeds).images[0]
|
44 |
+
image.save("cheeseburger_monster.png")
|
45 |
```
|
46 |
|
47 |
![img](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/kandinsky-docs/cheeseburger.png)
|
|
|
63 |
original_image = original_image.resize((768, 512))
|
64 |
|
65 |
# create prior
|
66 |
+
pipe_prior = KandinskyPriorPipeline.from_pretrained(
|
67 |
+
"kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16
|
68 |
+
)
|
69 |
pipe_prior.to("cuda")
|
70 |
|
71 |
# create img2img pipeline
|
|
|
75 |
prompt = "A fantasy landscape, Cinematic lighting"
|
76 |
negative_prompt = "low quality, bad quality"
|
77 |
|
78 |
+
generator = torch.Generator(device="cuda").manual_seed(30)
|
79 |
+
image_embeds, negative_image_embeds = pipe_prior(prompt, negative_prompt, generator=generator).to_tuple()
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
out = pipe(
|
82 |
prompt,
|
83 |
image=original_image,
|
84 |
+
image_embeds=image_embeds,
|
85 |
+
negative_image_embeds=negative_image_embeds,
|
86 |
height=768,
|
87 |
width=768,
|
|
|
88 |
strength=0.3,
|
89 |
)
|
90 |
|
|
|
102 |
import PIL
|
103 |
|
104 |
import torch
|
|
|
105 |
|
106 |
+
pipe_prior = KandinskyPriorPipeline.from_pretrained(
|
107 |
+
"kandinsky-community/kandinsky-2-1-prior", torch_dtype=torch.float16
|
108 |
+
)
|
109 |
pipe_prior.to("cuda")
|
110 |
|
111 |
img1 = load_image(
|
|
|
116 |
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/kandinsky/starry_night.jpeg"
|
117 |
)
|
118 |
|
119 |
+
# add all the conditions we want to interpolate, can be either text or image
|
120 |
images_texts = ["a cat", img1, img2]
|
121 |
+
|
122 |
+
# specify the weights for each condition in images_texts
|
123 |
weights = [0.3, 0.3, 0.4]
|
124 |
+
|
125 |
+
# We can leave the prompt empty
|
126 |
+
prompt = ""
|
127 |
+
prior_out = pipe_prior.interpolate(images_texts, weights)
|
128 |
|
129 |
pipe = KandinskyPipeline.from_pretrained("kandinsky-community/kandinsky-2-1", torch_dtype=torch.float16)
|
130 |
pipe.to("cuda")
|
131 |
|
132 |
+
image = pipe(prompt, **prior_out, height=768, width=768).images[0]
|
|
|
|
|
133 |
|
134 |
image.save("starry_cat.png")
|
135 |
```
|