Spaces:
Runtime error
Runtime error
File size: 12,524 Bytes
aa85975 f647b33 6474363 aa85975 6474363 f647b33 3300b67 6474363 f647b33 7e8a433 6474363 3300b67 6474363 f647b33 6474363 d8c4344 6474363 f647b33 d8c4344 f647b33 d8c4344 f647b33 d8c4344 f647b33 d8c4344 6474363 d8c4344 f647b33 d8c4344 f647b33 d8c4344 6474363 7e8a433 d8c4344 6474363 d8c4344 6474363 d8c4344 6474363 16852a7 d8c4344 6474363 d8c4344 5f6ce43 d8c4344 3300b67 6474363 d8c4344 6474363 d8c4344 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 |
import gradio as gr
from PIL import Image, ImageDraw, ImageFont, ImageSequence
import numpy as np
import cv2
import os
import tempfile
global stored_frames
def load_and_store_frames(image_file, grid_x, grid_y):
global stored_frames
try:
# Make sure file exists
if image_file is None:
return "File not found", ""
print(f"Loading frames for {image_file.name}")
if image_file.name.endswith('.mp4'):
frames = extract_frames_from_video(image_file.name)
video_path = image_file.name
else: # it's a gif
try:
img = Image.open(image_file.name)
except Exception as e:
print(f"Could not open GIF file: {e}")
return "Could not open GIF file", ""
frames = []
for i in range(0, img.n_frames):
try:
img.seek(i)
frames.append(img.copy())
except Exception as e:
print(f"Could not seek to frame {i}: {e}")
# Convert GIF to MP4 for preview
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
video_path = tmp_file.name
try:
duration = img.info.get('duration', 100)
# default to reasonable framerate if duration is 0
framerate = 1 / (duration / 1000.0) if duration > 0 else 10
print(f"frame count: {len(frames)} framerate: {duration} {img.info}")
convert_gif_to_video(image_file.name, tmp_file.name, framerate)
except Exception as e:
print(f"Could not convert GIF to MP4: {e}")
stored_frames = frames # Store the frames for later use
total_frames = len(frames)
selected_frames_count = grid_x * grid_y
details = f"**Total Frames:** {len(frames)}\n\n"
output_info = f"Grid size: {grid_x} x {grid_y}\n\nSelected Frames: {selected_frames_count} / {total_frames} ({selected_frames_count / total_frames * 100:.2f}%)"
return f"Frames loaded successfully\n\n{details}\n\n{output_info}", video_path
except Exception as e:
print(f"An error occurred while loading and storing frames: {e}")
return f"An error occurred: {e}", ""
def generate_grid(grid_x, grid_y, font_size, font_color, position, border_size, border_color):
global stored_frames
# print(f"Processing grid with {grid_x} x {grid_y} grid size, font size {font_size}, font color {font_color}, position {position}, border size {border_size}, border color {border_color}")
if stored_frames is None:
load_and_store_frames()
grid_img, output_info = create_grid(stored_frames, grid_x, grid_y, font_size, font_color, position, border_size, border_color)
details = f"Total Frames: {len(stored_frames)}\n\n{output_info}"
return grid_img, details
def create_grid(frames, grid_x, grid_y, font_size, font_color, position, border_size, border_color):
total_frames = len(frames)
selected_frames_count = grid_x * grid_y
# Select evenly spaced frames
selected_frames_indices = np.linspace(0, total_frames - 1, selected_frames_count).astype(int)
selected_frames = [frames[i] for i in selected_frames_indices]
# Modify frames by adding border and number
modified_frames = []
try:
font = ImageFont.truetype("Lato-Regular.ttf", font_size)
except IOError:
print("Font not found, using default font.")
font = ImageFont.load_default()
positions = {
"Top Left": (20, 20),
"Top Right": (frames[0].width - 20 - font_size, 20),
"Bottom Left": (20, frames[0].height - 20 - font_size),
"Bottom Right": (frames[0].width - 20 - font_size, frames[0].height - 20 - font_size)
}
for i, frame in enumerate(selected_frames):
# Add border
border_width = border_size
frame_with_border = Image.new('RGB', (frame.width + 2*border_width, frame.height + 2*border_width), border_color.lower())
frame_with_border.paste(frame, (border_width, border_width))
# Add number
draw = ImageDraw.Draw(frame_with_border)
text = str(i + 1)
text_position = (border_width + positions[position][0], border_width + positions[position][1])
draw.text(text_position, text, font=font, fill=font_color)
modified_frames.append(frame_with_border)
# Combine modified frames into a grid
grid_width = modified_frames[0].width * grid_x
grid_height = modified_frames[0].height * grid_y
grid_img = Image.new('RGB', (grid_width, grid_height), border_color.lower())
for i, frame in enumerate(modified_frames):
x_offset = (i % grid_x) * frame.width
y_offset = (i // grid_x) * frame.height
grid_img.paste(frame, (x_offset, y_offset))
output_info = f"Grid size: {grid_x} x {grid_y}\n\nSelected Frames: {selected_frames_count} / {total_frames} ({selected_frames_count / total_frames * 100:.2f}%)"
return grid_img, output_info
def extract_frames_from_video(video_file):
"""Extract frames from an MP4 video."""
frames = []
cap = cv2.VideoCapture(video_file)
while True:
ret, frame = cap.read()
if not ret:
break
# Convert BGR format (used by OpenCV) to RGB
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frames.append(Image.fromarray(frame_rgb))
cap.release()
return frames
def convert_gif_to_video(gif_path, output_video_path, frame_rate):
try:
# Load the gif
gif = Image.open(gif_path)
except Exception as e:
print(f"Could not open GIF file: {e}")
return
try:
# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, frame_rate, (gif.width, gif.height))
except Exception as e:
print(f"Could not create VideoWriter object: {e}")
return
try:
# Iterate over the frames of the gif
for frame_index in range(gif.n_frames):
gif.seek(frame_index)
# Convert the PIL Image to an array
frame_arr = np.array(gif.convert("RGB"))
# Convert RGB to BGR format
frame_bgr = cv2.cvtColor(frame_arr, cv2.COLOR_RGB2BGR)
# Write the frame to the video
out.write(frame_bgr)
except Exception as e:
print(f"Could not write frame to video: {e}")
out.release()
def gif_or_video_info(image_file, grid_x, grid_y, font_size, font_color, position, border_size, border_color):
image_file.file.seek(0)
video_path = ""
if image_file.name.endswith('.mp4'):
video_path = image_file.name
cap = cv2.VideoCapture(image_file.name)
frame_rate = cap.get(cv2.CAP_PROP_FPS) # Get the actual frame rate of the video
frames = extract_frames_from_video(image_file.name)
total_frames = len(frames)
cap.release()
else: # it's a gif
img = Image.open(image_file.name)
frames = []
for i in range(0, img.n_frames):
img.seek(i)
frames.append(img.copy())
total_frames = img.n_frames
frame_rate = 1 / (img.info.get('duration', 100) / 1000.0) # Convert to seconds
# Convert GIF to MP4 and save it to a temp path
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
video_path = tmp_file.name
convert_gif_to_video(image_file.name, tmp_file.name, frame_rate)
grid_img, output_info = create_grid(frames, grid_x, grid_y, font_size, font_color, position, border_size, border_color)
details = f"**Total Frames:** {total_frames}\n\n**Frame Rate:** {frame_rate} frames/sec\n\n{output_info}"
return grid_img, details, video_path
def gif_info(image_file, grid_x, grid_y, font_size, font_color, position, border_size, border_color):
return gif_or_video_info(image_file, grid_x, grid_y, font_size, font_color, position, border_size, border_color)
def mirror(x):
return x
with gr.Blocks() as app:
gr.Markdown('## vid2grid Generator')
gr.Markdown('Upload a GIF or MP4 to generate a grid from its frames. Use the sliders to adjust the grid size and text settings.\n\nThis is particularly useful for use with multi modal models such as GPT-4V to retrieve descriptions of short videos or gifs, [example here.](https://twitter.com/zachnagengast/status/1712896232170180651)\n\n **Note:** The grid will be generated only after clicking the "Generate Grid" button.')
with gr.Row():
with gr.Column():
control_image = gr.File(label="Upload a short MP4 or GIF", type="file", elem_id="file_upload", file_types=[".gif", ".mp4"])
video_preview = gr.Video(interactive=False, label="Preview", format="mp4")
gif_details = gr.Markdown("No file found.")
# gr.Examples(
# examples=[os.path.join(os.path.dirname(__file__), "demo.mp4")],
# inputs=[control_image],
# outputs=[gif_details, video_preview],
# fn=load_and_store_frames,
# cache_examples=True,
# )
process_button = gr.Button("Generate Grid") # New button to trigger the heavy computation
grid_x_slider = gr.Slider(minimum=1, maximum=10, step=1, value=3, label="Grid X Size")
grid_y_slider = gr.Slider(minimum=1, maximum=10, step=1, value=3, label="Grid Y Size")
font_color_dropdown = gr.Dropdown(choices=["Black", "White", "Red", "Green", "Blue"], value="White", label="Numbering Color")
position_radio = gr.Radio(choices=["Top Left", "Top Right", "Bottom Left", "Bottom Right"], value="Top Left", label="Numbering Position")
font_size_slider = gr.Slider(minimum=10, maximum=100, step=5, value=40, label="Font Size")
border_color_dropdown = gr.Dropdown(choices=["Black", "White", "Red", "Green", "Blue"], value="White", label="Border Color")
border_size_slider = gr.Slider(minimum=0, maximum=100, step=5, value=10, label="Border Size")
with gr.Column():
result_image = gr.Image(label="Generated Grid", value="https://i.imgur.com/fYrBwbd.png")
# Use .change() method to listen for changes in any of the controls
control_image.upload(load_and_store_frames, inputs=[control_image, grid_x_slider, grid_y_slider], outputs=[gif_details, video_preview])
# grid_x_slider.change(generate_grid, inputs=[grid_x_slider, grid_y_slider, font_size_slider, font_color_dropdown, position_radio, border_size_slider, border_color_dropdown], outputs=[result_image, gif_details, video_preview])
# grid_y_slider.change(generate_grid, inputs=[grid_x_slider, grid_y_slider, font_size_slider, font_color_dropdown, position_radio, border_size_slider, border_color_dropdown], outputs=[result_image, gif_details])
# font_size_slider.change(generate_grid, inputs=[grid_x_slider, grid_y_slider, font_size_slider, font_color_dropdown, position_radio, border_size_slider, border_color_dropdown], outputs=[result_image, gif_details])
# font_color_dropdown.change(generate_grid, inputs=[grid_x_slider, grid_y_slider, font_size_slider, font_color_dropdown, position_radio, border_size_slider, border_color_dropdown], outputs=[result_image, gif_details])
# position_radio.change(generate_grid, inputs=[grid_x_slider, grid_y_slider, font_size_slider, font_color_dropdown, position_radio, border_size_slider, border_color_dropdown], outputs=[result_image, gif_details])
# border_size_slider.change(generate_grid, inputs=[grid_x_slider, grid_y_slider, font_size_slider, font_color_dropdown, position_radio, border_size_slider, border_color_dropdown], outputs=[result_image, gif_details])
# border_color_dropdown.change(generate_grid, inputs=[grid_x_slider, grid_y_slider, font_size_slider, font_color_dropdown, position_radio, border_size_slider, border_color_dropdown], outputs=[result_image, gif_details])
process_button.click(generate_grid, inputs=[grid_x_slider, grid_y_slider, font_size_slider, font_color_dropdown, position_radio, border_size_slider, border_color_dropdown], outputs=[result_image, gif_details])
if __name__ == "__main__":
stored_frames = None
app.launch()
|