Spaces:

fffiloni
/

DimensionX

Paused

App Files Files Community

fffiloni commited on Nov 13, 2024

Commit

2c5156d

verified ·

1 Parent(s): 7840297

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -19

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ from transformers import T5EncoderModel, T5Tokenizer
 from datetime import datetime
 import random
 from huggingface_hub import hf_hub_download
@@ -38,26 +39,27 @@ pipe = CogVideoXImageToVideoPipeline.from_pretrained(model_id, tokenizer=tokeniz
 # Add this near the top after imports
 os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
-def find_and_move_object_to_cpu():
-    for obj in gc.get_objects():
-        try:
-            if isinstance(obj, torch.nn.Module):
-                if any(param.is_cuda for param in obj.parameters()):
-                    obj.to('cpu')
-                if any(buf.is_cuda for buf in obj.buffers()):
-                    obj.to('cpu')
-        except Exception as e:
-            pass
-def clear_gpu():
-    torch.cuda.empty_cache()
-    torch.cuda.synchronize()
-    gc.collect()
 def infer(image_path, prompt, orbit_type, progress=gr.Progress(track_tqdm=True)):
     # Move everything to CPU initially
     pipe.to("cpu")
     torch.cuda.empty_cache()
     lora_path = "checkpoints/"
     weight_name = "orbit_left_lora_weights.safetensors" if orbit_type == "Left" else "orbit_up_lora_weights.safetensors"
@@ -74,7 +76,6 @@ def infer(image_path, prompt, orbit_type, progress=gr.Progress(track_tqdm=True))
         torch.cuda.empty_cache()
         prompt = f"{prompt}. High quality, ultrarealistic detail and breath-taking movie-like camera shot."
-        image = load_image(image_path)
         seed = random.randint(0, 2**8 - 1)
         with torch.inference_mode():
@@ -94,11 +95,31 @@ def infer(image_path, prompt, orbit_type, progress=gr.Progress(track_tqdm=True))
         torch.cuda.empty_cache()
         gc.collect()
-    # Generate output video
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    export_to_video(video.frames[0], f"output_{timestamp}.mp4", fps=8)
-    return f"output_{timestamp}.mp4"
 # Set up Gradio UI
 with gr.Blocks(analytics_enabled=False) as demo:

 from datetime import datetime
 import random
+from moviepy.editor import VideoFileClip
 from huggingface_hub import hf_hub_download
 # Add this near the top after imports
 os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
+def calculate_resize_dimensions(width, height, max_width=1024):
+    """Calculate new dimensions maintaining aspect ratio"""
+    if width <= max_width:
+        return width, height
+    aspect_ratio = height / width
+    new_width = max_width
+    new_height = int(max_width * aspect_ratio)
+    return new_width, new_height
 def infer(image_path, prompt, orbit_type, progress=gr.Progress(track_tqdm=True)):
     # Move everything to CPU initially
     pipe.to("cpu")
     torch.cuda.empty_cache()
+    # Load and get original image dimensions
+    image = load_image(image_path)
+    original_width, original_height = image.size
+    # Calculate target dimensions maintaining aspect ratio
+    target_width, target_height = calculate_resize_dimensions(original_width, original_height)
     lora_path = "checkpoints/"
     weight_name = "orbit_left_lora_weights.safetensors" if orbit_type == "Left" else "orbit_up_lora_weights.safetensors"
         torch.cuda.empty_cache()
         prompt = f"{prompt}. High quality, ultrarealistic detail and breath-taking movie-like camera shot."
         seed = random.randint(0, 2**8 - 1)
         with torch.inference_mode():
         torch.cuda.empty_cache()
         gc.collect()
+   # Generate initial output video
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    temp_path = f"output_{timestamp}_temp.mp4"
+    output_path = f"output_{timestamp}.mp4"
+    # Export initial video
+    export_to_video(video.frames[0], temp_path, fps=8)
+    # Resize using moviepy with h264 codec
+    video_clip = VideoFileClip(temp_path)
+    resized_clip = video_clip.resize(width=target_width, height=target_height)
+    resized_clip.write_videofile(
+        output_path,
+        codec='libx264',
+        fps=8,
+        preset='medium',
+        ffmpeg_params=['-crf', '23']
+    )
+    # Cleanup
+    video_clip.close()
+    resized_clip.close()
+    os.remove(temp_path)
+    return output_path
 # Set up Gradio UI
 with gr.Blocks(analytics_enabled=False) as demo: