Spaces:
Paused
Paused
File size: 9,581 Bytes
653ce35 7ca5351 c93a0cb 120ac54 c93a0cb 454eedf 39489bd 0cbec0b acf84db 3a66e37 1c8c6b0 12c01c3 c05f3f8 1c8c6b0 3a66e37 833e264 65a9a4b 28ef21d 833e264 28ef21d 833e264 1c8c6b0 69ec2ea 9f98966 1c8c6b0 9f98966 1c8c6b0 de0aaee 9f98966 1c8c6b0 9f98966 90e6ba4 9f98966 69ec2ea 1c8c6b0 618e51c 69ec2ea 0b7a097 69ec2ea 1c8c6b0 6a87ed0 1c8c6b0 6a87ed0 1c8c6b0 c5773bb ba65f56 1c8c6b0 ba65f56 1c8c6b0 ba65f56 1c8c6b0 ba65f56 b015e22 ba65f56 1c8c6b0 ba65f56 1c8c6b0 ba65f56 1c8c6b0 ba65f56 56841f6 b8f702d 3f40d94 afad51c 9a63896 afad51c c93a0cb 56841f6 c93a0cb 56841f6 454eedf b015e22 69ec2ea cdd5bef b015e22 69ec2ea b015e22 69ec2ea b015e22 69ec2ea b015e22 69ec2ea b015e22 1c8c6b0 65a9a4b e23155a 65a9a4b 6a046f7 65a9a4b b5a5c95 a2a8df5 1c8c6b0 bce3142 1c8c6b0 98aad5a 39489bd 7ca5351 0269ee9 39489bd 0269ee9 eaf8a3c 106f93a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 |
import gradio as gr
import os
import subprocess
import cv2
import numpy as np
from moviepy.editor import VideoFileClip, concatenate_videoclips
import math
from huggingface_hub import snapshot_download
model_ids = [
'runwayml/stable-diffusion-v1-5',
'lllyasviel/sd-controlnet-depth',
'lllyasviel/sd-controlnet-canny',
'lllyasviel/sd-controlnet-openpose',
]
for model_id in model_ids:
model_name = model_id.split('/')[-1]
snapshot_download(model_id, local_dir=f'checkpoints/{model_name}')
def get_frame_count(filepath):
video = cv2.VideoCapture(filepath)
frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
video.release()
return gr.update(maximum=frame_count)
def get_video_dimension(filepath):
video = cv2.VideoCapture(filepath)
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(video.get(cv2.CAP_PROP_FPS))
frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
video.release()
return width, height, fps, frame_count
def resize_video(input_vid, output_vid, width, height, fps):
print(f"RESIZING ...")
# Open the input video file
video = cv2.VideoCapture(input_vid)
# Get the original video's width and height
original_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
original_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
# Create a VideoWriter object to write the resized video
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Codec for the output video
output_video = cv2.VideoWriter(output_vid, fourcc, fps, (width, height))
while True:
# Read a frame from the input video
ret, frame = video.read()
if not ret:
break
# Resize the frame to the desired dimensions
resized_frame = cv2.resize(frame, (width, height))
# Write the resized frame to the output video file
output_video.write(resized_frame)
# Release the video objects
video.release()
output_video.release()
print(f"RESIZE VIDEO DONE!")
return output_vid
def normalize_and_save_video(input_video_path, output_video_path):
print(f"NORMALIZING ...")
cap = cv2.VideoCapture(input_video_path)
# Get video properties
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
# Create VideoWriter object to save the normalized video
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Specify the codec (e.g., 'mp4v', 'XVID', 'MPEG')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
# Iterate through each frame in the video
for _ in range(frame_count):
ret, frame = cap.read()
if not ret:
break
# Convert frame to floating point
frame = frame.astype(np.float32)
# Normalize pixel values to the range [0, 1]
frame /= 255.0
# Convert normalized frame back to 8-bit unsigned integer
frame = (frame * 255.0).astype(np.uint8)
# Write the normalized frame to the output video file
out.write(frame)
# Release the VideoCapture and VideoWriter objects
cap.release()
out.release()
print(f"NORMALIZE DONE!")
return output_video_path
def chunkify(video_path, fps, nb_frames):
chunks_array = []
video_capture = cv2.VideoCapture(video_path)
chunk_start_frame = 0
frames_per_chunk = 12
while chunk_start_frame < nb_frames:
chunk_end_frame = min(chunk_start_frame + frames_per_chunk, nb_frames)
video_capture.set(cv2.CAP_PROP_POS_FRAMES, chunk_start_frame)
success, frame = video_capture.read()
if not success:
break
chunk_name = f"chunk_{chunk_start_frame}-{chunk_end_frame}.mp4"
chunk_video = cv2.VideoWriter(chunk_name, cv2.VideoWriter_fourcc(*"mp4v"), fps, (frame.shape[1], frame.shape[0]))
for frame_number in range(chunk_start_frame, chunk_end_frame):
video_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
success, frame = video_capture.read()
if not success:
break
chunk_video.write(frame)
chunk_video.release()
chunks_array.append(chunk_name)
chunk_start_frame += frames_per_chunk
video_capture.release()
print(f"CHUNKS: {chunks_array}")
return chunks_array
def run_inference_by_chunkify(prompt, video_path, condition, video_length):
# DOESN'T WORK
# Get FPS of original video input
target_fps = get_video_dimension(video_path)[2]
print(f"INPUT FPS: {target_fps}")
# Count total frames according to fps
total_frames = get_video_dimension(video_path)[3]
# Resize the video
resized = resize_video(video_path, 'resized.mp4', 512, 512, target_fps)
# Chunkify the video into 12 frames chunks
chunks = chunkify(resized, target_fps, total_frames)
output_path = 'output/'
os.makedirs(output_path, exist_ok=True)
processed_chunks = []
for index, chunk_path in enumerate(chunks):
if index == 0 :
print(f"Chunk #{index}: {chunk_path}")
# Check if the file already exists
if os.path.exists(os.path.join(output_path, f"{index}.mp4")):
# Delete the existing file
os.remove(os.path.join(output_path, f"{index}.mp4"))
#if video_length > 12:
# command = f"python inference.py --prompt '{prompt}' --condition '{condition}' --video_path '{video_path}' --output_path '{output_path}' --width 512 --height 512 --fps 8 --video_length {video_length} --is_long_video"
#else:
command = f"python inference.py --prompt '{prompt}' --condition '{condition}' --video_path '{chunk_path}' --output_path '{output_path}' --temp_chunk_path '{index}' --width 512 --height 512 --fps 8 --video_length {video_length} --is_long_video"
subprocess.run(command, shell=True)
# Construct the video path
video_path_output = os.path.join(output_path, f"{index}.mp4")
# Append processed chunk to final array
processed_chunks.append(video_path_output)
else:
print("finished")
print(f"PROCESSED CHUNKS: {processed_chunks}")
return "done", processed_chunks[0]
def run_inference(prompt, video_path, condition, video_length):
# Get FPS of original video input
target_fps = get_video_dimension(video_path)[2]
print(f"INPUT FPS: {target_fps}")
# Count total frames according to fps
total_frames = get_video_dimension(video_path)[3]
# Resize the video
resized = resize_video(video_path, 'resized.mp4', 512, 512, target_fps)
# normalize pixels
normalized = normalize_and_save_video(resized, 'normalized.mp4')
output_path = 'output/'
os.makedirs(output_path, exist_ok=True)
# Check if the file already exists
if os.path.exists(os.path.join(output_path, f"result.mp4")):
# Delete the existing file
os.remove(os.path.join(output_path, f"result.mp4"))
print(f"RUNNING INFERENCE ...")
if video_length > 12:
command = f"python inference.py --prompt '{prompt}' --condition '{condition}' --video_path '{normalized}' --output_path '{output_path}' --temp_chunk_path 'result' --width 512 --height 512 --fps {target_fps} --video_length {video_length} --is_long_video"
else:
command = f"python inference.py --prompt '{prompt}' --condition '{condition}' --video_path '{normalized}' --output_path '{output_path}' --temp_chunk_path 'result' --width 512 --height 512 --fps {target_fps} --video_length {video_length}"
subprocess.run(command, shell=True)
# Construct the video path
video_path_output = os.path.join(output_path, f"result.mp4")
print(f"FINISHED !")
return "done", video_path_output
css="""
#col-container {max-width: 810px; margin-left: auto; margin-right: auto;}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown("""
<h1 style="text-align: center;">ControlVideo</h1>
""")
with gr.Row():
with gr.Column():
#video_in = gr.Video(source="upload", type="filepath", visible=True)
video_path = gr.Video(source="upload", type="filepath", visible=True)
prompt = gr.Textbox(label="prompt")
with gr.Row():
condition = gr.Dropdown(label="Condition", choices=["depth", "canny", "pose"], value="depth")
video_length = gr.Slider(label="Video length", info="How many frames do you want to process ?", minimum=1, maximum=12, step=1, value=2)
#seed = gr.Number(label="seed", value=42)
submit_btn = gr.Button("Submit")
with gr.Column():
video_res = gr.Video(label="result")
status = gr.Textbox(label="result")
video_path.change(fn=get_frame_count,
inputs=[video_path],
outputs=[video_length]
)
submit_btn.click(fn=run_inference,
inputs=[prompt,
video_path,
condition,
video_length
],
outputs=[status, video_res])
demo.queue(max_size=12).launch() |