File size: 5,046 Bytes
caa56d6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
'''
# author: Zhiyuan Yan
# email: [email protected]
# date: 2023-03-30
The code is specifically designed for generating nearest sample pairs for Face X-ray.
Alternatively, you can utilize the pre-generated pkl files available in our GitHub repository. Please refer to the "Releases" section on our repository for accessing these files.
'''
import os
import json
import pickle
import numpy as np
import heapq
import random
from tqdm import tqdm
from scipy.spatial import KDTree
def load_landmark(file_path):
"""
Load 2D facial landmarks from a file path.
Args:
file_path: A string indicating the path to the landmark file.
Returns:
A numpy array containing the loaded landmarks.
Raises:
None.
"""
if file_path is None:
return np.zeros((81, 2))
if os.path.exists(file_path):
landmark = np.load(file_path)
return np.float32(landmark)
else:
return np.zeros((81, 2))
def get_landmark_dict(dataset_folder):
# Check if the dictionary has already been created
if os.path.exists('landmark_dict_ff.pkl'):
with open('landmark_dict_ff.pkl', 'rb') as f:
return pickle.load(f)
# Open the metadata file for the current folder
metadata_path = os.path.join(dataset_folder, "FaceForensics++.json")
with open(metadata_path, "r") as f:
metadata = json.load(f)
# Iterate over the metadata entries and add the landmark paths to the list
ff_real_data = metadata['FaceForensics++']['FF-real']
# Using dictionary comprehension to generate the landmark_dict
landmark_dict = {
frame_path.replace('frames', 'landmarks').replace(".png", ".npy"): load_landmark(
frame_path.replace('frames', 'landmarks').replace(".png", ".npy")
)
for mode, value in ff_real_data.items()
for video_name, video_info in tqdm(value['c23'].items())
for frame_path in video_info['frames']
}
# Save the dictionary to a pickle file
with open('landmark_dict_ffall.pkl', 'wb') as f:
pickle.dump(landmark_dict, f)
return landmark_dict
def get_nearest_faces_fixed_pair(landmark_info, num_neighbors):
'''
Using KDTree to find the nearest faces for each image (Much faster!!)
'''
random.seed(1024) # Fix the random seed for reproducibility
# Check if the dictionary has already been created
if os.path.exists('nearest_face_info.pkl'):
with open('nearest_face_info.pkl', 'rb') as f:
return pickle.load(f)
landmarks_array = np.array([lmk.flatten() for lmk in landmark_info.values()])
landmark_ids = list(landmark_info.keys())
# Build a KDTree using the flattened landmarks
tree = KDTree(landmarks_array)
nearest_faces = {}
for idx, this_lmk in tqdm(enumerate(landmarks_array), total=len(landmarks_array)):
# Query the KDTree for the nearest neighbors (excluding itself)
dists, indices = tree.query(this_lmk, k=num_neighbors + 1)
# Randomly pick one from the nearest N neighbors (excluding itself)
picked_idx = random.choice(indices[1:])
nearest_faces[landmark_ids[idx]] = landmark_ids[picked_idx]
# Save the dictionary to a pickle file
with open('nearest_face_info.pkl', 'wb') as f:
pickle.dump(nearest_faces, f)
return nearest_faces
def get_nearest_faces(landmark_info, num_neighbors):
'''
Using KDTree to find the nearest faces for each image (Much faster!!)
'''
random.seed(1024) # Fix the random seed for reproducibility
# Check if the dictionary has already been created
if os.path.exists('nearest_face_info.pkl'):
with open('nearest_face_info.pkl', 'rb') as f:
return pickle.load(f)
landmarks_array = np.array([lmk.flatten() for lmk in landmark_info.values()])
landmark_ids = list(landmark_info.keys())
# Build a KDTree using the flattened landmarks
tree = KDTree(landmarks_array)
nearest_faces = {}
for idx, this_lmk in tqdm(enumerate(landmarks_array), total=len(landmarks_array)):
# Query the KDTree for the nearest neighbors (excluding itself)
dists, indices = tree.query(this_lmk, k=num_neighbors + 1)
# Store the nearest N neighbors (excluding itself)
nearest_faces[landmark_ids[idx]] = [landmark_ids[i] for i in indices[1:]]
# Save the dictionary to a pickle file
with open('nearest_face_info.pkl', 'wb') as f:
pickle.dump(nearest_faces, f)
return nearest_faces
# Load the landmark dictionary and obtain the landmark dict
dataset_folder = "/home/zhiyuanyan/disfin/deepfake_benchmark/preprocessing/dataset_json/"
landmark_info = get_landmark_dict(dataset_folder)
# Get the nearest faces for each image (in landmark_dict)
num_neighbors = 100
nearest_faces_info = get_nearest_faces(landmark_info, num_neighbors) # running time: about 20 mins
|