image-variation-experiments / inference_flux_model.py
ivand-all3d's picture
Initial commit
1f61707
raw
history blame
3.7 kB
import argparse
import time
import torch
from diffusers import FluxTransformer2DModel
from transformers import CLIPModel
from pathlib import Path
from PIL import Image
from open_flux_pipeline import FluxWithCFGPipeline
pipe = None
def generate(prompt, image_prompt=None, guidance_scale=2, num_images=4, resolution=512):
# Create blank image prompt backgrounds
image_prompt_kwargs = {
"image_prompt": Image.new("RGB", (resolution, resolution)),
"negative_image_prompt": Image.new("RGB", (resolution, resolution)),
}
if image_prompt is not None:
image_prompt_kwargs["image_prompt"] = image_prompt
with torch.no_grad():
images = pipe(
prompt=prompt,
negative_prompt="",
height=resolution,
width=resolution,
max_sequence_length=256,
guidance_scale=guidance_scale,
num_images_per_prompt=num_images,
**image_prompt_kwargs
).images
# Concatenate all images horizontally
widths, heights = zip(*[img.size for img in images])
total_width = sum(widths) + len(images) - 1
max_height = max(heights)
out = Image.new('RGB', (total_width, max_height))
x_offset = 0
for img in images:
out.paste(img, (x_offset, 0))
x_offset += img.width + 1
# If an image prompt was provided, stack it above the generated images
if image_prompt is not None:
out_with_image_prompt = Image.new('RGB', (out.width, out.height + 1 + resolution))
resized_prompt = image_prompt.resize((resolution, resolution), Image.Resampling.BILINEAR)
out_with_image_prompt.paste(resized_prompt, (0, 0))
out_with_image_prompt.paste(out, (0, resolution + 1))
out = out_with_image_prompt
# Ensure the output directory exists and save the final image
Path("image-outputs").mkdir(parents=True, exist_ok=True)
output_filename = f"image-outputs/{prompt[:40].replace(' ', '_')}.{int(time.time())}.png"
out.save(output_filename)
print(f"Saved output to {output_filename}")
def main():
parser = argparse.ArgumentParser(description="Generate images using an image and a text prompt (Flux Image Variations).")
parser.add_argument("--prompt", type=str, default="", help='The text prompt for image generation (default "")')
parser.add_argument("--image_prompt", type=str, default=None,
help="Path to an optional image to use as a prompt")
parser.add_argument("--guidance_scale", type=float, default=2,
help="Guidance scale for image generation (default: 2)")
parser.add_argument("--num_images", type=int, default=4,
help="Number of images to generate (default: 4)")
parser.add_argument("--resolution", type=int, default=512,
help="Resolution for generated images (default: 512)")
args = parser.parse_args()
# Load models and pipelines
global pipe
clip = CLIPModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=torch.bfloat16)
pipe = FluxWithCFGPipeline.from_pretrained("ostris/OpenFLUX.1", text_encoder=clip, transformer=None, torch_dtype=torch.bfloat16)
pipe.transformer = FluxTransformer2DModel.from_pretrained("flux-image-variations-model", torch_dtype=torch.bfloat16)
pipe.to("cuda")
img_prompt = Image.open(args.image_prompt) if args.image_prompt else None
generate(args.prompt, image_prompt=img_prompt, guidance_scale=args.guidance_scale,
num_images=args.num_images, resolution=args.resolution)
if __name__ == "__main__":
main()