Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -15,21 +15,21 @@ num_frames, width, height = 49, 832, 480
|
|
15 |
gpu_id = 0
|
16 |
device = f'cuda:{gpu_id}' if torch.cuda.is_available() else 'cpu'
|
17 |
|
18 |
-
from modelscope import snapshot_download
|
19 |
-
model_dir = snapshot_download( # https://www.modelscope.cn/models/AI-ModelScope/RMBG-2.0
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
)
|
24 |
|
25 |
-
from huggingface_hub import snapshot_download, hf_hub_download
|
26 |
-
snapshot_download( # 下载整个仓库; 下briaai/RMBG-2.0需要token
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
)
|
33 |
|
34 |
# hf_hub_download(
|
35 |
# repo_id="Kunbyte/Lumen",
|
@@ -39,38 +39,38 @@ snapshot_download( # 下载整个仓库; 下briaai/RMBG-2.0需要token
|
|
39 |
# resume_download=True,
|
40 |
# )
|
41 |
|
42 |
-
rmbg_model = AutoModelForImageSegmentation.from_pretrained('ckpt/RMBG-2.0', trust_remote_code=True) # ckpt/RMBG-2.0
|
43 |
-
torch.set_float32_matmul_precision(['high', 'highest'][0])
|
44 |
-
rmbg_model.to(device)
|
45 |
-
rmbg_model.eval()
|
46 |
|
47 |
|
48 |
-
model_manager = ModelManager(device="cpu") # 1.3b: device=cpu: uses 6G VRAM, device=device: uses 16G VRAM; about 1-2 min per video
|
49 |
-
wan_dit_path = 'train_res/wan1.3b_zh/full_wc0.5_f1gt0.5_real1_2_zh_en_l_s/lightning_logs/version_0/checkpoints/step-step=30000.ckpt'
|
50 |
|
51 |
-
if 'wan14b' in wan_dit_path.lower(): # 14B: uses about 36G, about 10 min per video
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
else:
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
wan_pipe = WanVideoPipeline.from_model_manager(model_manager, torch_dtype=torch.bfloat16, device=device)
|
73 |
-
wan_pipe.enable_vram_management(num_persistent_param_in_dit=None)
|
74 |
|
75 |
gr_info_duration = 2 # gradio popup information duration
|
76 |
|
@@ -197,6 +197,7 @@ relight_dir = ''
|
|
197 |
with gr.Blocks(title="Lumen: Video Relighting Model").queue() as demo:
|
198 |
gr.Markdown("# 💡Lumen: Consistent Video Relighting and Harmonious Background Replacement\n # <center>with Video Generative Models ([Project Page](https://lumen-relight.github.io/))</center>")
|
199 |
gr.Markdown('💡 **Lumen** is a video relighting model that can relight the foreground and replace the background of a video base on the input text. The **usage steps** are as follows:')
|
|
|
200 |
gr.Markdown('1. **Upload Video** (will use the first 49 frames and be resized to 832*480). \n' \
|
201 |
'2. **Extract Foreground**. We use [RMBG2.0](https://github.com/ai-anchorite/BRIA-RMBG-2.0) to extract the foreground but it may get unstable results. If so, we recommend to use [MatAnyone](https://huggingface.co/spaces/PeiqingYang/MatAnyone) to get the **black-and-white mask video**(Alpha Output) and upload it, and then click the **S2** button. \n' \
|
202 |
'3. **Input Caption**. Select or input the caption you want the video to be. We recommend you to use any LLM ( e.g. [Deepseek](https://chat.deepseek.com/), [Qwen](https://www.tongyi.com/) ) to expand the caption with a simple prompt (请发挥想象力, 扩充下面的视频描述, 如背景, 环境光对前景的影响等), since long prompts may get better results. ' \
|
@@ -308,7 +309,9 @@ with gr.Blocks(title="Lumen: Video Relighting Model").queue() as demo:
|
|
308 |
|
309 |
# Launch application
|
310 |
if __name__ == "__main__":
|
311 |
-
demo.launch(
|
312 |
-
|
313 |
-
|
|
|
|
|
314 |
|
|
|
15 |
gpu_id = 0
|
16 |
device = f'cuda:{gpu_id}' if torch.cuda.is_available() else 'cpu'
|
17 |
|
18 |
+
# from modelscope import snapshot_download
|
19 |
+
# model_dir = snapshot_download( # https://www.modelscope.cn/models/AI-ModelScope/RMBG-2.0
|
20 |
+
# model_id = 'AI-ModelScope/RMBG-2.0',
|
21 |
+
# local_dir = 'ckpt/RMBG-2.0',
|
22 |
+
# ignore_file_pattern = ['onnx*'],
|
23 |
+
# )
|
24 |
|
25 |
+
# from huggingface_hub import snapshot_download, hf_hub_download
|
26 |
+
# snapshot_download( # 下载整个仓库; 下briaai/RMBG-2.0需要token
|
27 |
+
# repo_id="alibaba-pai/Wan2.1-Fun-1.3B-Control",
|
28 |
+
# local_dir="ckpt/Wan2.1-Fun-1.3B-Control",
|
29 |
+
# local_dir_use_symlinks=False,
|
30 |
+
# resume_download=True,
|
31 |
+
# repo_type="model"
|
32 |
+
# )
|
33 |
|
34 |
# hf_hub_download(
|
35 |
# repo_id="Kunbyte/Lumen",
|
|
|
39 |
# resume_download=True,
|
40 |
# )
|
41 |
|
42 |
+
# rmbg_model = AutoModelForImageSegmentation.from_pretrained('ckpt/RMBG-2.0', trust_remote_code=True) # ckpt/RMBG-2.0
|
43 |
+
# torch.set_float32_matmul_precision(['high', 'highest'][0])
|
44 |
+
# rmbg_model.to(device)
|
45 |
+
# rmbg_model.eval()
|
46 |
|
47 |
|
48 |
+
# model_manager = ModelManager(device="cpu") # 1.3b: device=cpu: uses 6G VRAM, device=device: uses 16G VRAM; about 1-2 min per video
|
49 |
+
# wan_dit_path = 'train_res/wan1.3b_zh/full_wc0.5_f1gt0.5_real1_2_zh_en_l_s/lightning_logs/version_0/checkpoints/step-step=30000.ckpt'
|
50 |
|
51 |
+
# if 'wan14b' in wan_dit_path.lower(): # 14B: uses about 36G, about 10 min per video
|
52 |
+
# model_manager.load_models(
|
53 |
+
# [
|
54 |
+
# wan_dit_path if wan_dit_path else 'ckpt/Wan2.1-Fun-14B-Control/diffusion_pytorch_model.safetensors',
|
55 |
+
# 'ckpt/Wan2.1-Fun-1.3B-Control/Wan2.1_VAE.pth',
|
56 |
+
# 'ckpt/Wan2.1-Fun-1.3B-Control/models_t5_umt5-xxl-enc-bf16.pth',
|
57 |
+
# 'ckpt/Wan2.1-Fun-1.3B-Control/models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth',
|
58 |
+
# ],
|
59 |
+
# torch_dtype=torch.bfloat16, # float8_e4m3fn fp8量化; bfloat16
|
60 |
+
# )
|
61 |
+
# else:
|
62 |
+
# wan_dit_path = None
|
63 |
+
# model_manager.load_models(
|
64 |
+
# [
|
65 |
+
# wan_dit_path if wan_dit_path else 'ckpt/Wan2.1-Fun-1.3B-Control/diffusion_pytorch_model.safetensors',
|
66 |
+
# 'ckpt/Wan2.1-Fun-1.3B-Control/Wan2.1_VAE.pth',
|
67 |
+
# 'ckpt/Wan2.1-Fun-1.3B-Control/models_t5_umt5-xxl-enc-bf16.pth',
|
68 |
+
# 'ckpt/Wan2.1-Fun-1.3B-Control/models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth',
|
69 |
+
# ],
|
70 |
+
# torch_dtype=torch.bfloat16,
|
71 |
+
# )
|
72 |
+
# wan_pipe = WanVideoPipeline.from_model_manager(model_manager, torch_dtype=torch.bfloat16, device=device)
|
73 |
+
# wan_pipe.enable_vram_management(num_persistent_param_in_dit=None)
|
74 |
|
75 |
gr_info_duration = 2 # gradio popup information duration
|
76 |
|
|
|
197 |
with gr.Blocks(title="Lumen: Video Relighting Model").queue() as demo:
|
198 |
gr.Markdown("# 💡Lumen: Consistent Video Relighting and Harmonious Background Replacement\n # <center>with Video Generative Models ([Project Page](https://lumen-relight.github.io/))</center>")
|
199 |
gr.Markdown('💡 **Lumen** is a video relighting model that can relight the foreground and replace the background of a video base on the input text. The **usage steps** are as follows:')
|
200 |
+
|
201 |
gr.Markdown('1. **Upload Video** (will use the first 49 frames and be resized to 832*480). \n' \
|
202 |
'2. **Extract Foreground**. We use [RMBG2.0](https://github.com/ai-anchorite/BRIA-RMBG-2.0) to extract the foreground but it may get unstable results. If so, we recommend to use [MatAnyone](https://huggingface.co/spaces/PeiqingYang/MatAnyone) to get the **black-and-white mask video**(Alpha Output) and upload it, and then click the **S2** button. \n' \
|
203 |
'3. **Input Caption**. Select or input the caption you want the video to be. We recommend you to use any LLM ( e.g. [Deepseek](https://chat.deepseek.com/), [Qwen](https://www.tongyi.com/) ) to expand the caption with a simple prompt (请发挥想象力, 扩充下面的视频描述, 如背景, 环境光对前景的影响等), since long prompts may get better results. ' \
|
|
|
309 |
|
310 |
# Launch application
|
311 |
if __name__ == "__main__":
|
312 |
+
demo.launch(
|
313 |
+
server_name='0.0.0.0',
|
314 |
+
debug=True,
|
315 |
+
ssr_mode=False,
|
316 |
+
)
|
317 |
|