stable-virtual-camera / benchmark /export_reconfusion_example.py
ZayarnyukNick's picture
Upload folder using huggingface_hub
864ebc9 verified
import argparse
import json
import os
import numpy as np
from PIL import Image
try:
from sklearn.cluster import KMeans # type: ignore[import]
except ImportError:
print("Please install sklearn to use this script.")
exit(1)
# Define the folder containing the image and JSON files
subfolder = "/path/to/your/dataset"
output_file = os.path.join(subfolder, "transforms.json")
# List to hold the frames
frames = []
# Iterate over the files in the folder
for file in sorted(os.listdir(subfolder)):
if file.endswith(".json"):
# Read the JSON file containing camera extrinsics and intrinsics
json_path = os.path.join(subfolder, file)
with open(json_path, "r") as f:
data = json.load(f)
# Read the corresponding image file
image_file = file.replace(".json", ".png")
image_path = os.path.join(subfolder, image_file)
if not os.path.exists(image_path):
print(f"Image file not found for {file}, skipping...")
continue
with Image.open(image_path) as img:
w, h = img.size
# Extract and normalize intrinsic matrix K
K = data["K"]
fx = K[0][0] * w
fy = K[1][1] * h
cx = K[0][2] * w
cy = K[1][2] * h
# Extract the transformation matrix
transform_matrix = np.array(data["c2w"])
# Adjust for OpenGL convention
transform_matrix[..., [1, 2]] *= -1
# Add the frame data
frames.append(
{
"fl_x": fx,
"fl_y": fy,
"cx": cx,
"cy": cy,
"w": w,
"h": h,
"file_path": f"./{os.path.relpath(image_path, subfolder)}",
"transform_matrix": transform_matrix.tolist(),
}
)
# Create the output dictionary
transforms_data = {"orientation_override": "none", "frames": frames}
# Write to the transforms.json file
with open(output_file, "w") as f:
json.dump(transforms_data, f, indent=4)
print(f"transforms.json generated at {output_file}")
# Train-test split function using K-means clustering with stride
def create_train_test_split(frames, n, output_path, stride):
# Prepare the data for K-means
positions = []
for frame in frames:
transform_matrix = np.array(frame["transform_matrix"])
position = transform_matrix[:3, 3] # 3D camera position
direction = transform_matrix[:3, 2] / np.linalg.norm(
transform_matrix[:3, 2]
) # Normalized 3D direction
positions.append(np.concatenate([position, direction]))
positions = np.array(positions)
# Apply K-means clustering
kmeans = KMeans(n_clusters=n, random_state=42)
kmeans.fit(positions)
centers = kmeans.cluster_centers_
# Find the index closest to each cluster center
train_ids = []
for center in centers:
distances = np.linalg.norm(positions - center, axis=1)
train_ids.append(int(np.argmin(distances))) # Convert to Python int
# Remaining indices as test_ids, applying stride
all_indices = set(range(len(frames)))
remaining_indices = sorted(all_indices - set(train_ids))
test_ids = [
int(idx) for idx in remaining_indices[::stride]
] # Convert to Python int
# Create the split data
split_data = {"train_ids": sorted(train_ids), "test_ids": test_ids}
with open(output_path, "w") as f:
json.dump(split_data, f, indent=4)
print(f"Train-test split file generated at {output_path}")
# Parse arguments
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Generate train-test split JSON file using K-means clustering."
)
parser.add_argument(
"--n",
type=int,
required=True,
help="Number of frames to include in the training set.",
)
parser.add_argument(
"--stride",
type=int,
default=1,
help="Stride for selecting test frames (not used with K-means).",
)
args = parser.parse_args()
# Create train-test split
train_test_split_path = os.path.join(subfolder, f"train_test_split_{args.n}.json")
create_train_test_split(frames, args.n, train_test_split_path, args.stride)