# Using reference image to add object in generated image import torch from diffusers import StableDiffusionGLIGENTextImagePipeline from diffusers.utils import load_image def normalize_bbox(bboxes, img_width, img_height): normalized_bboxes = [] for box in bboxes: x_min, y_min, x_max, y_max = box x_min = (x_min / img_width) y_min = (y_min / img_height) x_max = (x_max / img_width) y_max = (y_max / img_height) normalized_bboxes.append([x_min, y_min, x_max, y_max]) return normalized_bboxes def inference_image(pipe, prompt, grounding_instruction, state): print(prompt) print(grounding_instruction) bbox = state['boxes'] # bbox = state print(bbox) bbox = normalize_bbox(bbox, 600, 600) print(bbox) objects = [obj for obj in grounding_instruction.split(';') if obj.strip()] print(objects) image = pipe( prompt=prompt, gligen_phrases=grounding_instruction, gligen_images=[], gligen_boxes=bbox, gligen_scheduled_sampling_beta=1, output_type="pil", num_inference_steps=50, ).images[0] return image if __name__ == "__main__": pipe = StableDiffusionGLIGENTextImagePipeline.from_pretrained("anhnct/Gligen_Text_Image", torch_dtype=torch.float16) pipe = pipe.to("cuda") prompt = "a flower sitting on the beach" boxes = [[0.0, 0.09, 0.53, 0.76]] phrases = ["flower"] # gligen_image = load_image( # "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/gligen/pexels-pixabay-60597.jpg" # ) images = pipe( prompt=prompt, gligen_phrases=phrases, gligen_images=[], gligen_boxes=boxes, gligen_scheduled_sampling_beta=1, output_type="pil", num_inference_steps=50, ).images images[0].save("./gligen-generation-text-image-box.jpg")