Spaces:

fffiloni
/

Meigen-MultiTalk

Running on L40S

App Files Files Community

fffiloni commited on Jun 24

Commit

d7bf027

verified ·

1 Parent(s): 7170008

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -18

app.py CHANGED Viewed

@@ -7,6 +7,31 @@ import json
 import tempfile
 from huggingface_hub import snapshot_download
 num_gpus = torch.cuda.device_count()
 print(f"GPU AVAILABLE: {num_gpus}")
@@ -114,6 +139,11 @@ def create_temp_input_json(prompt: str, cond_image_path: str, cond_audio_path: s
 def infer(prompt, cond_image_path, cond_audio_path):
     # Prepare input JSON
     input_json_path = create_temp_input_json(prompt, cond_image_path, cond_audio_path)
@@ -140,24 +170,29 @@ def infer(prompt, cond_image_path, cond_audio_path):
     else:
         cmd = ["python3", "generate_multitalk.py"] + common_args
-    # Log to file and stream
-    with open("inference.log", "w") as log_file:
-        process = subprocess.Popen(
-            cmd,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-            bufsize=1
-        )
-        for line in process.stdout:
-            print(line, end="")
-            log_file.write(line)
-        process.wait()
-    if process.returncode != 0:
-        raise RuntimeError("Inference failed. Check inference.log for details.")
-    return "multi_long_multigpu_exp.mp4"
 with gr.Blocks(title="MultiTalk Inference") as demo:

 import tempfile
 from huggingface_hub import snapshot_download
+import soundfile as sf
+import tempfile
+from datetime import datetime
+is_shared_ui = True if "fffiloni/Meigen-MultiTalk" in os.environ['SPACE_ID'] else False
+def trim_audio_to_5s_temp(audio_path, sample_rate=16000):
+    max_duration_sec = 5
+    audio, sr = sf.read(audio_path)
+    if sr != sample_rate:
+        raise ValueError(f"Expected sample rate {sample_rate}, but got {sr}")
+    max_samples = max_duration_sec * sample_rate
+    if len(audio) > max_samples:
+        audio = audio[:max_samples]
+    timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")
+    base_name = os.path.splitext(os.path.basename(audio_path))[0]
+    temp_filename = f"{base_name}_trimmed_{timestamp}.wav"
+    temp_path = os.path.join(tempfile.gettempdir(), temp_filename)
+    sf.write(temp_path, audio, samplerate=sample_rate)
+    return temp_path
 num_gpus = torch.cuda.device_count()
 print(f"GPU AVAILABLE: {num_gpus}")
 def infer(prompt, cond_image_path, cond_audio_path):
+    if is_shared_ui:
+        trimmed_audio_path = trim_audio_to_5s_temp(cond_audio_path)
+        cond_audio_path = trimmed_audio_path
     # Prepare input JSON
     input_json_path = create_temp_input_json(prompt, cond_image_path, cond_audio_path)
     else:
         cmd = ["python3", "generate_multitalk.py"] + common_args
+    try:
+        # Log to file and stream
+        with open("inference.log", "w") as log_file:
+            process = subprocess.Popen(
+                cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                bufsize=1
+            )
+            for line in process.stdout:
+                print(line, end="")
+                log_file.write(line)
+            process.wait()
+        if process.returncode != 0:
+            raise RuntimeError("Inference failed. Check inference.log for details.")
+        return "multi_long_multigpu_exp.mp4"
+    finally:
+        if os.path.exists(trimmed_audio_path):
+            os.remove(trimmed_audio_path)
 with gr.Blocks(title="MultiTalk Inference") as demo: