from diffusers import StableDiffusion3Pipeline import torch from PIL import Image import os import json import argparse parser = argparse.ArgumentParser(description="Diffusion Pipeline with Arguments") parser.add_argument( "--json_filename", type=str, required=True, help="Path to the JSON file containing text data", ) parser.add_argument( "--cuda", type=int, required=True, help="CUDA device to use for processing" ) args = parser.parse_args() json_filename = args.json_filename cuda_device = f"cuda:{args.cuda}" print(json_filename, cuda_device) image_dir = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data" with open(json_filename, "r") as f: json_data = json.load(f) pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16) pipe.to(cuda_device) for text in json_data: prompt = "" for caption in text['conversations']: if caption['from'] == 'gpt': prompt += caption['value'] # for caption in text['conversations']: # prompt += caption['value'] image = pipe( prompt=prompt, prompt_3=prompt, negative_prompt="", num_inference_steps=60, height=1024, width=1024, guidance_scale=10.0, max_sequence_length=512, ).images[0] subdir = text["image"].split("/")[0] if not os.path.exists(os.path.join(image_dir, subdir)): os.makedirs(os.path.join(image_dir, subdir)) image_path = os.path.join(image_dir, text["image"]) image.save(image_path) print("所有图像已成功生成并保存。")