Spaces:
Sleeping
Sleeping
File size: 1,952 Bytes
2c0adca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# Using reference image to add object in generated image
import torch
from diffusers import StableDiffusionGLIGENTextImagePipeline
from diffusers.utils import load_image
def normalize_bbox(bboxes, img_width, img_height):
normalized_bboxes = []
for box in bboxes:
x_min, y_min, x_max, y_max = box
x_min = (x_min / img_width)
y_min = (y_min / img_height)
x_max = (x_max / img_width)
y_max = (y_max / img_height)
normalized_bboxes.append([x_min, y_min, x_max, y_max])
return normalized_bboxes
def inference_image(pipe, prompt, grounding_instruction, state):
print(prompt)
print(grounding_instruction)
bbox = state['boxes']
# bbox = state
print(bbox)
bbox = normalize_bbox(bbox, 600, 600)
print(bbox)
objects = [obj for obj in grounding_instruction.split(';') if obj.strip()]
print(objects)
image = pipe(
prompt=prompt,
gligen_phrases=grounding_instruction,
gligen_images=[],
gligen_boxes=bbox,
gligen_scheduled_sampling_beta=1,
output_type="pil",
num_inference_steps=50,
).images[0]
return image
if __name__ == "__main__":
pipe = StableDiffusionGLIGENTextImagePipeline.from_pretrained("anhnct/Gligen_Text_Image", torch_dtype=torch.float16)
pipe = pipe.to("cuda")
prompt = "a flower sitting on the beach"
boxes = [[0.0, 0.09, 0.53, 0.76]]
phrases = ["flower"]
# gligen_image = load_image(
# "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/gligen/pexels-pixabay-60597.jpg"
# )
images = pipe(
prompt=prompt,
gligen_phrases=phrases,
gligen_images=[],
gligen_boxes=boxes,
gligen_scheduled_sampling_beta=1,
output_type="pil",
num_inference_steps=50,
).images
images[0].save("./gligen-generation-text-image-box.jpg")
|