Qwen-Image-Edit

Runtime error

App Files Files Community

dangthr commited on 4 days ago

Commit

da3cf12

verified ·

1 Parent(s): 4eb64b8

Update inference.py

Browse files

Files changed (1) hide show

inference.py +41 -87

inference.py CHANGED Viewed

@@ -1,5 +1,6 @@
 # inference.py
 import os
 import argparse
 import random
 import json
@@ -14,7 +15,7 @@ from PIL import Image
 from diffusers import QwenImageEditPipeline
 # --- 从原脚本保留的辅助函数 ---
 SYSTEM_PROMPT = '''
 # Edit Instruction Rewriter
 You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable professional-level edit instruction based on the user-provided instruction and the image to be edited.
@@ -80,9 +81,8 @@ def polish_prompt(prompt, img):
     if not os.environ.get('DASH_API_KEY'):
         print("[警告] 环境变量 DASH_API_KEY 未设置，将跳过提示词重写。")
         return prompt
     full_prompt = f"{SYSTEM_PROMPT}\n\nUser Input: {prompt}\n\nRewritten Prompt:"
-    for attempt in range(3): # 最多重试3次
         try:
             result = api(full_prompt, [img])
             if isinstance(result, str):
@@ -90,12 +90,10 @@ def polish_prompt(prompt, img):
                 result_data = json.loads(result_json_str)
             else:
                 result_data = json.loads(result)
             polished = result_data['Rewritten']
             return polished.strip().replace("\n", " ")
         except Exception as e:
             print(f"[警告] API调用失败 (尝试 {attempt + 1}): {e}")
     print("[错误] 多次尝试后提示词重写失败，将使用原始提示词。")
     return prompt
@@ -111,23 +109,11 @@ def api(prompt, img_list, model="qwen-vl-max-latest", kwargs={}):
     api_key = os.environ.get('DASH_API_KEY')
     if not api_key:
         raise EnvironmentError("DASH_API_KEY is not set")
-    messages = [
-        {"role": "system", "content": "you are a helpful assistant, you should provide useful answers to users."},
-        {"role": "user", "content": []}
-    ]
     for img in img_list:
         messages[1]["content"].append({"image": f"data:image/png;base64,{encode_image(img)}"})
     messages[1]["content"].append({"text": f"{prompt}"})
-    response = dashscope.MultiModalConversation.call(
-        api_key=api_key,
-        model=model,
-        messages=messages,
-        result_format='message',
-        response_format=kwargs.get('response_format', None),
-    )
     if response.status_code == 200:
         return response.output.choices[0].message.content[0]['text']
     else:
@@ -148,113 +134,81 @@ def load_image(image_path):
         print(f"   详细信息: {e}")
         return None
-# --- 主推理逻辑 ---
 def main(args):
     """执行模型推理的主函数"""
     output_dir = "output"
     os.makedirs(output_dir, exist_ok=True)
     dtype = torch.bfloat16
     device = "cuda" if torch.cuda.is_available() else "cpu"
     print(f"使用设备: {device}")
     print("正在加载 Qwen-Image-Edit 模型...")
     try:
         pipe = QwenImageEditPipeline.from_pretrained("Qwen/Qwen-Image-Edit", torch_dtype=dtype).to(device)
         print("模型加载完成。")
     except Exception as e:
-        print(f"❌ 错误：模型加载失败。请检查网络连接和依赖项。")
         print(f"   详细信息: {e}")
         return
     print(f"正在从 '{args.input_image}' 加载输入图片...")
     input_image = load_image(args.input_image)
     if input_image is None:
         return
-    # 设置随机种子
     seed = random.randint(0, np.iinfo(np.int32).max) if args.random_seed else args.seed
     generator = torch.Generator(device=device).manual_seed(seed)
-    # 如果不禁用重写功能，则调用 polish_prompt
     prompt_to_use = polish_prompt(args.prompt, input_image) if not args.no_rewrite else args.prompt
     if not args.no_rewrite:
         print(f"重写后的提示词: '{prompt_to_use}'")
     print("-" * 30)
     print("🚀 开始推理...")
     print(f"  - 提示词: '{prompt_to_use}'")
     print(f"  - 随机种子: {seed}")
     print(f"  - 推理步数: {args.steps}")
-    print(f"  -引导系数 (Guidance Scale): {args.guidance_scale}")
     print("-" * 30)
     try:
-        images = pipe(
-            image=input_image,
-            prompt=prompt_to_use,
-            negative_prompt=" ", # 固定负向提示词
-            num_inference_steps=args.steps,
-            generator=generator,
-            true_cfg_scale=args.guidance_scale,
-            num_images_per_prompt=1
-        ).images
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         output_path = os.path.join(output_dir, f"output_{timestamp}_{seed}.png")
         images[0].save(output_path)
         print(f"✅ 推理成功！图片已保存至: {output_path}")
     except Exception as e:
         print(f"❌ 推理过程中发生错误: {e}")
 # --- 命令行接口 ---
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Qwen 图像编辑命令行工具")
-    parser.add_argument(
-        "--prompt",
-        type=str,
-        required=True,
-        help="必须：用于编辑图像的指令。"
-    )
-    parser.add_argument(
-        "--input_image",
-        type=str,
-        required=True,
-        help="必须：输入图片的本地路径或URL链接。"
-    )
-    parser.add_argument(
-        "--seed",
-        type=int,
-        default=42,
-        help="用于复现结果的随机种子，默认为 42。"
-    )
-    parser.add_argument(
-        "--random_seed",
-        action="store_true",
-        help="如果设置此项，则使用一个随机种子。"
-    )
-    parser.add_argument(
-        "--steps",
-        type=int,
-        default=50,
-        help="推理步数，默认为 50。"
-    )
-    parser.add_argument(
-        "--guidance_scale",
-        type=float,
-        default=4.0,
-        help="引导系数 (CFG scale)，默认为 4.0。"
-    )
-    parser.add_argument(
-        "--no_rewrite",
-        action="store_true",
-        help="如果设置此项，则禁用提示词重写功能。"
-    )
     args = parser.parse_args()
     main(args)

 # inference.py
 import os
+import sys  # 导入 sys 模块
 import argparse
 import random
 import json
 from diffusers import QwenImageEditPipeline
 # --- 从原脚本保留的辅助函数 ---
+# SYSTEM_PROMPT, polish_prompt, encode_image, api 函数保持不变...
 SYSTEM_PROMPT = '''
 # Edit Instruction Rewriter
 You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable professional-level edit instruction based on the user-provided instruction and the image to be edited.
     if not os.environ.get('DASH_API_KEY'):
         print("[警告] 环境变量 DASH_API_KEY 未设置，将跳过提示词重写。")
         return prompt
     full_prompt = f"{SYSTEM_PROMPT}\n\nUser Input: {prompt}\n\nRewritten Prompt:"
+    for attempt in range(3):
         try:
             result = api(full_prompt, [img])
             if isinstance(result, str):
                 result_data = json.loads(result_json_str)
             else:
                 result_data = json.loads(result)
             polished = result_data['Rewritten']
             return polished.strip().replace("\n", " ")
         except Exception as e:
             print(f"[警告] API调用失败 (尝试 {attempt + 1}): {e}")
     print("[错误] 多次尝试后提示词重写失败，将使用原始提示词。")
     return prompt
     api_key = os.environ.get('DASH_API_KEY')
     if not api_key:
         raise EnvironmentError("DASH_API_KEY is not set")
+    messages = [{"role": "system", "content": "you are a helpful assistant, you should provide useful answers to users."},{"role": "user", "content": []}]
     for img in img_list:
         messages[1]["content"].append({"image": f"data:image/png;base64,{encode_image(img)}"})
     messages[1]["content"].append({"text": f"{prompt}"})
+    response = dashscope.MultiModalConversation.call(api_key=api_key,model=model,messages=messages,result_format='message',response_format=kwargs.get('response_format', None),)
     if response.status_code == 200:
         return response.output.choices[0].message.content[0]['text']
     else:
         print(f"   详细信息: {e}")
         return None
+def prepare_model():
+    """仅下载并缓存模型，不执行推理"""
+    print("正在准备模型... 如果是首次运行，将开始下载模型文件（约10GB）。")
+    print("请耐心等待，下载速度取决于您的网络状况。")
+    dtype = torch.bfloat16
+    try:
+        QwenImageEditPipeline.from_pretrained(
+            "Qwen/Qwen-Image-Edit",
+            torch_dtype=dtype,
+            low_cpu_mem_usage=True # 优化内存使用
+        )
+        print("\n✅ 模型文件已成功准备（下载/加载）到本地缓存。")
+        return True
+    except Exception as e:
+        print(f"\n❌ 错误：模型下载或加载失败。请检查网络连接或磁盘空间。")
+        print(f"   详细信息: {e}")
+        return False
+# --- 主推理逻辑 ---
 def main(args):
     """执行模型推理的主函数"""
     output_dir = "output"
     os.makedirs(output_dir, exist_ok=True)
     dtype = torch.bfloat16
     device = "cuda" if torch.cuda.is_available() else "cpu"
     print(f"使用设备: {device}")
     print("正在加载 Qwen-Image-Edit 模型...")
     try:
         pipe = QwenImageEditPipeline.from_pretrained("Qwen/Qwen-Image-Edit", torch_dtype=dtype).to(device)
         print("模型加载完成。")
     except Exception as e:
+        print(f"❌ 错误：模型加载失败。")
         print(f"   详细信息: {e}")
         return
     print(f"正在从 '{args.input_image}' 加载输入图片...")
     input_image = load_image(args.input_image)
     if input_image is None:
         return
     seed = random.randint(0, np.iinfo(np.int32).max) if args.random_seed else args.seed
     generator = torch.Generator(device=device).manual_seed(seed)
     prompt_to_use = polish_prompt(args.prompt, input_image) if not args.no_rewrite else args.prompt
     if not args.no_rewrite:
         print(f"重写后的提示词: '{prompt_to_use}'")
     print("-" * 30)
     print("🚀 开始推理...")
     print(f"  - 提示词: '{prompt_to_use}'")
     print(f"  - 随机种子: {seed}")
     print(f"  - 推理步数: {args.steps}")
+    print(f"  - 引导系数 (Guidance Scale): {args.guidance_scale}")
     print("-" * 30)
     try:
+        images = pipe(image=input_image,prompt=prompt_to_use,negative_prompt=" ",num_inference_steps=args.steps,generator=generator,true_cfg_scale=args.guidance_scale,num_images_per_prompt=1).images
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         output_path = os.path.join(output_dir, f"output_{timestamp}_{seed}.png")
         images[0].save(output_path)
         print(f"✅ 推理成功！图片已保存至: {output_path}")
     except Exception as e:
         print(f"❌ 推理过程中发生错误: {e}")
 # --- 命令行接口 ---
 if __name__ == "__main__":
+    # 新增逻辑：检查是否只运行脚本而不带任何参数
+    if len(sys.argv) == 1:
+        prepare_model()
+        print("任务完成，脚本退出。")
+        sys.exit(0) # 正常退出
+    # 如果带有参数，则执行原有的推理流程
+    parser = argparse.ArgumentParser(description="Qwen 图像编辑命令行工具", epilog="如果不提供任何参数，脚本将只下载模型然后退出。")
+    parser.add_argument("--prompt",type=str,required=True,help="必须：用于编辑图像的指令。")
+    parser.add_argument("--input_image",type=str,required=True,help="必须：输入图片的本地路径或URL链接。")
+    parser.add_argument("--seed",type=int,default=42,help="用于复现结果的随机种子，默认为 42。")
+    parser.add_argument("--random_seed",action="store_true",help="如果设置此项，则使用一个随机种子。")
+    parser.add_argument("--steps",type=int,default=50,help="推理步数，默认为 50。")
+    parser.add_argument("--guidance_scale",type=float,default=4.0,help="引导系数 (CFG scale)，默认为 4.0。")
+    parser.add_argument("--no_rewrite",action="store_true",help="如果设置此项，则禁用提示词重写功能。")
     args = parser.parse_args()
     main(args)