Spaces:

fffiloni
/

Meigen-MultiTalk

Running on L40S

App Files Files Community

fffiloni commited on 3 days ago

Commit

f7f6486

verified ·

1 Parent(s): 9a622ff

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -20

app.py CHANGED Viewed

@@ -7,6 +7,9 @@ import json
 import tempfile
 from huggingface_hub import snapshot_download
 # Download All Required Models using `snapshot_download`
 # Download Wan2.1-I2V-14B-480P model
@@ -79,7 +82,7 @@ GPU_TO_VRAM_PARAMS = {
     "NVIDIA A100-SXM4-40GB": 11000000000,
     "NVIDIA A100-SXM4-80GB": 22000000000,
     "NVIDIA L4": 5000000000,
-    "NVIDIA L40S": 5000000000
 }
 USED_VRAM_PARAMS = GPU_TO_VRAM_PARAMS[gpu_name]
 print("Using", USED_VRAM_PARAMS, "for num_persistent_param_in_dit")
@@ -110,46 +113,51 @@ def create_temp_input_json(prompt: str, cond_image_path: str, cond_audio_path: s
     return temp_json_path
-def infer(prompt, cond_image_path, cond_audio_path):
     input_json_path = create_temp_input_json(prompt, cond_image_path, cond_audio_path)
-    cmd = [
-        "python3", "generate_multitalk.py",
         "--ckpt_dir", "weights/Wan2.1-I2V-14B-480P",
         "--wav2vec_dir", "weights/chinese-wav2vec2-base",
         "--input_json", input_json_path,
-        "--sample_steps", "12",
-        #"--motion_frame", "2",
-        "--num_persistent_param_in_dit", str(USED_VRAM_PARAMS),
         "--mode", "streaming",
         "--use_teacache",
-        "--save_file", "multi_long_mediumvram_exp"
     ]
-    # Optional: log file
-    log_file_path = "inference.log"
-    # Run and stream logs in real-time
-    with open(log_file_path, "w") as log_file:
         process = subprocess.Popen(
             cmd,
             stdout=subprocess.PIPE,
             stderr=subprocess.STDOUT,
             text=True,
-            bufsize=1  # Line-buffered
         )
         for line in process.stdout:
-            print(line, end="")     # Print to console in real-time
-            log_file.write(line)    # Save to log file
         process.wait()
     if process.returncode != 0:
         raise RuntimeError("Inference failed. Check inference.log for details.")
-    return "multi_long_mediumvram_exp.mp4"
 with gr.Blocks(title="MultiTalk Inference") as demo:

 import tempfile
 from huggingface_hub import snapshot_download
+num_gpus = torch.cuda.device_count()
+print(f"GPU AVAILABLE: {num_gpus}")
 # Download All Required Models using `snapshot_download`
 # Download Wan2.1-I2V-14B-480P model
     "NVIDIA A100-SXM4-40GB": 11000000000,
     "NVIDIA A100-SXM4-80GB": 22000000000,
     "NVIDIA L4": 5000000000,
+    "NVIDIA L40S": 11000000000
 }
 USED_VRAM_PARAMS = GPU_TO_VRAM_PARAMS[gpu_name]
 print("Using", USED_VRAM_PARAMS, "for num_persistent_param_in_dit")
     return temp_json_path
+def infer(prompt, cond_image_path, cond_audio_path):
+    # Prepare input JSON
     input_json_path = create_temp_input_json(prompt, cond_image_path, cond_audio_path)
+    # Base args
+    common_args = [
         "--ckpt_dir", "weights/Wan2.1-I2V-14B-480P",
         "--wav2vec_dir", "weights/chinese-wav2vec2-base",
         "--input_json", input_json_path,
+        "--sample_steps", "6",
         "--mode", "streaming",
         "--use_teacache",
+        "--save_file", "multi_long_multigpu_exp"
     ]
+    if num_gpus > 1:
+        cmd = [
+            "torchrun",
+            f"--nproc_per_node={num_gpus}",
+            "--standalone",
+            "generate_multitalk.py",
+            "--dit_fsdp", "--t5_fsdp",
+            "--ulysses_size", str(num_gpus),
+        ] + common_args
+    else:
+        cmd = ["python3", "generate_multitalk.py"] + common_args
+    # Log to file and stream
+    with open("inference.log", "w") as log_file:
         process = subprocess.Popen(
             cmd,
             stdout=subprocess.PIPE,
             stderr=subprocess.STDOUT,
             text=True,
+            bufsize=1
         )
         for line in process.stdout:
+            print(line, end="")
+            log_file.write(line)
         process.wait()
     if process.returncode != 0:
         raise RuntimeError("Inference failed. Check inference.log for details.")
+    return "multi_long_multigpu_exp.mp4"
 with gr.Blocks(title="MultiTalk Inference") as demo: