Spaces:

Kunbyte
/

Lumen

Running on Zero

App Files Files Community

Fly-ShuAI commited on Jun 25

Commit

ee08f0b

verified ·

1 Parent(s): c795d1c

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -46

app.py CHANGED Viewed

@@ -15,21 +15,21 @@ num_frames, width, height = 49, 832, 480
 gpu_id = 0
 device = f'cuda:{gpu_id}' if torch.cuda.is_available() else 'cpu'
-from modelscope import snapshot_download
-model_dir = snapshot_download( # https://www.modelscope.cn/models/AI-ModelScope/RMBG-2.0
-    model_id = 'AI-ModelScope/RMBG-2.0',
-    local_dir = 'ckpt/RMBG-2.0',
-    ignore_file_pattern = ['onnx*'],
-)
-from huggingface_hub import snapshot_download, hf_hub_download
-snapshot_download( # 下载整个仓库; 下briaai/RMBG-2.0需要token
-    repo_id="alibaba-pai/Wan2.1-Fun-1.3B-Control",
-    local_dir="ckpt/Wan2.1-Fun-1.3B-Control",
-    local_dir_use_symlinks=False,
-    resume_download=True,
-    repo_type="model"
-)
 # hf_hub_download(
 #     repo_id="Kunbyte/Lumen",
@@ -39,38 +39,38 @@ snapshot_download( # 下载整个仓库; 下briaai/RMBG-2.0需要token
 #     resume_download=True,
 # )
-rmbg_model = AutoModelForImageSegmentation.from_pretrained('ckpt/RMBG-2.0', trust_remote_code=True) # ckpt/RMBG-2.0
-torch.set_float32_matmul_precision(['high', 'highest'][0])
-rmbg_model.to(device)
-rmbg_model.eval()
-model_manager = ModelManager(device="cpu") # 1.3b: device=cpu: uses 6G VRAM, device=device: uses 16G VRAM; about 1-2 min per video
-wan_dit_path = 'train_res/wan1.3b_zh/full_wc0.5_f1gt0.5_real1_2_zh_en_l_s/lightning_logs/version_0/checkpoints/step-step=30000.ckpt'
-if 'wan14b' in wan_dit_path.lower(): # 14B: uses about 36G, about 10 min per video
-    model_manager.load_models(
-        [
-            wan_dit_path if wan_dit_path else 'ckpt/Wan2.1-Fun-14B-Control/diffusion_pytorch_model.safetensors',
-            'ckpt/Wan2.1-Fun-1.3B-Control/Wan2.1_VAE.pth',
-            'ckpt/Wan2.1-Fun-1.3B-Control/models_t5_umt5-xxl-enc-bf16.pth',
-            'ckpt/Wan2.1-Fun-1.3B-Control/models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth',
-        ],
-        torch_dtype=torch.bfloat16, # float8_e4m3fn fp8量化; bfloat16
-    )
-else:
-    wan_dit_path = None
-    model_manager.load_models(
-        [
-            wan_dit_path if wan_dit_path else 'ckpt/Wan2.1-Fun-1.3B-Control/diffusion_pytorch_model.safetensors',
-            'ckpt/Wan2.1-Fun-1.3B-Control/Wan2.1_VAE.pth',
-            'ckpt/Wan2.1-Fun-1.3B-Control/models_t5_umt5-xxl-enc-bf16.pth',
-            'ckpt/Wan2.1-Fun-1.3B-Control/models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth',
-        ],
-        torch_dtype=torch.bfloat16,
-    )
-wan_pipe = WanVideoPipeline.from_model_manager(model_manager, torch_dtype=torch.bfloat16, device=device)
-wan_pipe.enable_vram_management(num_persistent_param_in_dit=None)
 gr_info_duration = 2 # gradio popup information duration
@@ -197,6 +197,7 @@ relight_dir = ''
 with gr.Blocks(title="Lumen: Video Relighting Model").queue() as demo:
     gr.Markdown("# 💡Lumen: Consistent Video Relighting and Harmonious Background Replacement\n # <center>with Video Generative Models ([Project Page](https://lumen-relight.github.io/))</center>")
     gr.Markdown('💡 **Lumen** is a video relighting model that can relight the foreground and replace the background of a video base on the input text. The **usage steps** are as follows:')
     gr.Markdown('1. **Upload Video** (will use the first 49 frames and be resized to 832*480). \n' \
     '2. **Extract Foreground**. We use [RMBG2.0](https://github.com/ai-anchorite/BRIA-RMBG-2.0) to extract the foreground but it may get unstable results. If so, we recommend to use [MatAnyone](https://huggingface.co/spaces/PeiqingYang/MatAnyone) to get the **black-and-white mask video**(Alpha Output) and upload it, and then click the **S2** button. \n' \
     '3. **Input Caption**. Select or input the caption you want the video to be. We recommend you to use any LLM ( e.g. [Deepseek](https://chat.deepseek.com/), [Qwen](https://www.tongyi.com/) ) to expand the caption with a simple prompt (请发挥想象力, 扩充下面的视频描述, 如背景, 环境光对前景的影响等), since long prompts may get better results. ' \
@@ -308,7 +309,9 @@ with gr.Blocks(title="Lumen: Video Relighting Model").queue() as demo:
 # Launch application
 if __name__ == "__main__":
-    demo.launch()
-    # demo.launch( share=True )

 gpu_id = 0
 device = f'cuda:{gpu_id}' if torch.cuda.is_available() else 'cpu'
+# from modelscope import snapshot_download
+# model_dir = snapshot_download( # https://www.modelscope.cn/models/AI-ModelScope/RMBG-2.0
+#     model_id = 'AI-ModelScope/RMBG-2.0',
+#     local_dir = 'ckpt/RMBG-2.0',
+#     ignore_file_pattern = ['onnx*'],
+# )
+# from huggingface_hub import snapshot_download, hf_hub_download
+# snapshot_download( # 下载整个仓库; 下briaai/RMBG-2.0需要token
+#     repo_id="alibaba-pai/Wan2.1-Fun-1.3B-Control",
+#     local_dir="ckpt/Wan2.1-Fun-1.3B-Control",
+#     local_dir_use_symlinks=False,
+#     resume_download=True,
+#     repo_type="model"
+# )
 # hf_hub_download(
 #     repo_id="Kunbyte/Lumen",
 #     resume_download=True,
 # )
+# rmbg_model = AutoModelForImageSegmentation.from_pretrained('ckpt/RMBG-2.0', trust_remote_code=True) # ckpt/RMBG-2.0
+# torch.set_float32_matmul_precision(['high', 'highest'][0])
+# rmbg_model.to(device)
+# rmbg_model.eval()
+# model_manager = ModelManager(device="cpu") # 1.3b: device=cpu: uses 6G VRAM, device=device: uses 16G VRAM; about 1-2 min per video
+# wan_dit_path = 'train_res/wan1.3b_zh/full_wc0.5_f1gt0.5_real1_2_zh_en_l_s/lightning_logs/version_0/checkpoints/step-step=30000.ckpt'
+# if 'wan14b' in wan_dit_path.lower(): # 14B: uses about 36G, about 10 min per video
+#     model_manager.load_models(
+#         [
+#             wan_dit_path if wan_dit_path else 'ckpt/Wan2.1-Fun-14B-Control/diffusion_pytorch_model.safetensors',
+#             'ckpt/Wan2.1-Fun-1.3B-Control/Wan2.1_VAE.pth',
+#             'ckpt/Wan2.1-Fun-1.3B-Control/models_t5_umt5-xxl-enc-bf16.pth',
+#             'ckpt/Wan2.1-Fun-1.3B-Control/models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth',
+#         ],
+#         torch_dtype=torch.bfloat16, # float8_e4m3fn fp8量化; bfloat16
+#     )
+# else:
+#     wan_dit_path = None
+#     model_manager.load_models(
+#         [
+#             wan_dit_path if wan_dit_path else 'ckpt/Wan2.1-Fun-1.3B-Control/diffusion_pytorch_model.safetensors',
+#             'ckpt/Wan2.1-Fun-1.3B-Control/Wan2.1_VAE.pth',
+#             'ckpt/Wan2.1-Fun-1.3B-Control/models_t5_umt5-xxl-enc-bf16.pth',
+#             'ckpt/Wan2.1-Fun-1.3B-Control/models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth',
+#         ],
+#         torch_dtype=torch.bfloat16,
+#     )
+# wan_pipe = WanVideoPipeline.from_model_manager(model_manager, torch_dtype=torch.bfloat16, device=device)
+# wan_pipe.enable_vram_management(num_persistent_param_in_dit=None)
 gr_info_duration = 2 # gradio popup information duration
 with gr.Blocks(title="Lumen: Video Relighting Model").queue() as demo:
     gr.Markdown("# 💡Lumen: Consistent Video Relighting and Harmonious Background Replacement\n # <center>with Video Generative Models ([Project Page](https://lumen-relight.github.io/))</center>")
     gr.Markdown('💡 **Lumen** is a video relighting model that can relight the foreground and replace the background of a video base on the input text. The **usage steps** are as follows:')
     gr.Markdown('1. **Upload Video** (will use the first 49 frames and be resized to 832*480). \n' \
     '2. **Extract Foreground**. We use [RMBG2.0](https://github.com/ai-anchorite/BRIA-RMBG-2.0) to extract the foreground but it may get unstable results. If so, we recommend to use [MatAnyone](https://huggingface.co/spaces/PeiqingYang/MatAnyone) to get the **black-and-white mask video**(Alpha Output) and upload it, and then click the **S2** button. \n' \
     '3. **Input Caption**. Select or input the caption you want the video to be. We recommend you to use any LLM ( e.g. [Deepseek](https://chat.deepseek.com/), [Qwen](https://www.tongyi.com/) ) to expand the caption with a simple prompt (请发挥想象力, 扩充下面的视频描述, 如背景, 环境光对前景的影响等), since long prompts may get better results. ' \
 # Launch application
 if __name__ == "__main__":
+    demo.launch(
+        server_name='0.0.0.0',
+        debug=True,
+        ssr_mode=False,
+    )