Spaces:
Running
Running
import ast | |
import re | |
from pathlib import Path | |
import requests | |
def group_files_by_index(file_paths, data_type="audio"): | |
# Regular expression pattern to extract the key from each image path | |
if data_type == "audio": | |
pattern = r"audio_(\d+).(png|wav)" | |
elif data_type == "video": | |
pattern = r"video_(\d+).(png|mkv)" | |
else: | |
pattern = r"img_(\d+).png" | |
# Dictionary to store the grouped files | |
grouped_files = {} | |
# Iterate over each file path | |
for file_path in file_paths: | |
# Extract the key using the regular expression pattern | |
match = re.search(pattern, file_path) | |
if match: | |
key = int(match.group(1)) | |
# Add the file path to the corresponding group in the dictionary | |
if key not in grouped_files: | |
grouped_files[key] = [] | |
grouped_files[key].append(file_path) | |
# Sort the dictionary by keys | |
sorted_grouped_files = dict(sorted(grouped_files.items())) | |
return sorted_grouped_files | |
def build_description( | |
i, data_none, data_attack, quality_metrics=["psnr", "ssim", "lpips"] | |
): | |
# TODO: handle this at data generation | |
if isinstance(data_none["fake_det"], str): | |
data_none["fake_det"] = ast.literal_eval(data_none["fake_det"]) | |
if isinstance(data_none["watermark_det"], str): | |
data_none["watermark_det"] = ast.literal_eval(data_none["watermark_det"]) | |
if isinstance(data_attack["fake_det"], str): | |
data_attack["fake_det"] = ast.literal_eval(data_attack["fake_det"]) | |
if isinstance(data_attack["watermark_det"], str): | |
data_attack["watermark_det"] = ast.literal_eval(data_attack["watermark_det"]) | |
if i == 0: | |
fake_det = data_none["fake_det"] | |
return {"detected": fake_det} | |
elif i == 1: | |
# Fixed metrics | |
det = data_none["watermark_det"] | |
p_value = float(data_none["p_value"]) | |
bit_acc = data_none["bit_acc"] | |
# Dynamic metrics | |
metrics_output = {} | |
for metric in quality_metrics: | |
value = float(data_none[metric]) | |
metrics_output[metric] = round(value, 2) | |
# Fixed metrics output | |
metrics_output.update( | |
{ | |
"detected": det, | |
"p_value": round(p_value, 2), | |
"bit_acc": round(bit_acc, 2), | |
} | |
) | |
return metrics_output | |
elif i == 2: | |
fake_det = data_attack["fake_det"] | |
return {"detected": fake_det} | |
elif i == 3: # REVISIT THIS, it used to be == 3 | |
det = data_attack["watermark_det"] | |
p_value = float(data_attack["p_value"]) | |
word_acc = data_attack["word_acc"] | |
bit_acc = data_attack["bit_acc"] | |
return { | |
"word_acc": round(word_acc, 2), | |
"detected": det, | |
"p_value": round(p_value, 2), | |
"bit_acc": round(bit_acc, 2), | |
} | |
def build_infos(abs_path: Path, datatype: str, dataset_name: str, db_key: str): | |
def generate_file_patterns(prefixes, extensions): | |
indices = [0, 1, 3, 4, 5] | |
return [ | |
f"{prefix}_{index:05d}.{ext}" | |
for prefix in prefixes | |
for index in indices | |
for ext in extensions | |
] | |
if datatype == "audio": | |
quality_metrics = ["snr", "sisnr", "stoi", "pesq"] | |
extensions = ["wav"] | |
datatype_abbr = "audio" | |
eval_results_path = abs_path + f"{dataset_name}_1k/examples_eval_results.json" | |
elif datatype == "image": | |
quality_metrics = ["psnr", "ssim", "lpips"] | |
extensions = ["png"] | |
datatype_abbr = "img" | |
eval_results_path = abs_path + f"{dataset_name}_1k/examples_eval_results.json" | |
elif datatype == "video": | |
quality_metrics = ["psnr", "ssim", "lpips", "msssim", "vmaf"] | |
extensions = ["mkv"] | |
datatype_abbr = "video" | |
eval_results_path = abs_path + f"{dataset_name}/examples_eval_results.json" | |
response = requests.get(eval_results_path) | |
if response.status_code == 200: | |
results_data = response.json() | |
else: | |
return {} | |
dataset = results_data["eval"][db_key] | |
prefixes = [ | |
f"attacked_{datatype_abbr}", | |
f"attacked_wmd_{datatype_abbr}", | |
f"{datatype_abbr}", | |
f"wmd_{datatype_abbr}", | |
] | |
file_patterns = generate_file_patterns(prefixes, extensions) | |
infos = {} | |
for model_name in dataset.keys(): | |
model_infos = {} | |
default_attack_name = "none" | |
if datatype == "audio": | |
default_attack_name = "identity" | |
elif datatype == "video": | |
default_attack_name = "Identity" | |
identity_attack_rows = dataset[model_name][default_attack_name]["default"] | |
for attack_name, attack_variants_data in dataset[model_name].items(): | |
for attack_variant, attack_rows in attack_variants_data.items(): | |
if attack_variant == "default": | |
attack = attack_name | |
else: | |
attack = f"{attack_name}_{attack_variant}" | |
if len(attack_rows) == 0: | |
model_infos[attack] = [] | |
continue | |
if datatype == "video": | |
file_paths = [ | |
f"{abs_path}{dataset_name}/examples/{datatype}/{model_name}/{attack}/{pattern}" | |
for pattern in file_patterns | |
] | |
else: | |
file_paths = [ | |
f"{abs_path}{dataset_name}_1k/examples/{datatype}/{model_name}/{attack}/{pattern}" | |
for pattern in file_patterns | |
] | |
all_files = [] | |
for i, files in group_files_by_index( | |
file_paths, | |
data_type=datatype, | |
).items(): | |
data_none = [e for e in identity_attack_rows if e["idx"] == i][0] | |
data_attack = [e for e in attack_rows if e["idx"] == i][0] | |
files = sorted( | |
[(f, Path(f).stem) for f in files], key=lambda x: x[1] | |
) | |
files = files[2:] + files[:2] | |
new_files = [] | |
for variant_i, (file, name) in enumerate(files): | |
file_info = { | |
"name": name, | |
"metadata": build_description( | |
variant_i, data_none, data_attack, quality_metrics | |
), | |
} | |
if datatype == "audio": | |
file_info["image_url"] = file.replace(".wav", ".png") | |
file_info["audio_url"] = file | |
elif datatype == "video": | |
file_info["image_url"] = file.replace(".mkv", ".png") | |
file_info["video_url"] = file | |
else: | |
file_info["image_url"] = file | |
new_files.append(file_info) | |
all_files.extend(new_files) | |
model_infos[attack] = all_files | |
infos[model_name] = model_infos | |
return infos | |
def image_examples_tab(abs_path: Path): | |
dataset_name = "coco_val2014" | |
datatype = "image" | |
db_key = "coco_val2014" | |
image_infos = build_infos( | |
abs_path, | |
datatype=datatype, | |
dataset_name=dataset_name, | |
db_key=db_key, | |
) | |
# First combo box (category selection) | |
# model_choice = gr.Dropdown( | |
# choices=list(image_infos.keys()), | |
# label="Select a Model", | |
# value=None, | |
# ) | |
# Second combo box (subcategory selection) | |
# Initialize with options from the first category by default | |
# attack_choice = gr.Dropdown( | |
# choices=list(image_infos["wam"].keys()), | |
# label="Select an Attack", | |
# value=None, | |
# ) | |
# # Gallery component to display images | |
# gallery = gr.Gallery( | |
# label="Image Gallery", | |
# columns=4, | |
# rows=1, | |
# ) | |
# Update options for the second combo box when the first one changes | |
# def update_subcategories(selected_category): | |
# values = list(image_infos[selected_category].keys()) | |
# values = [(v, v) for v in values] | |
# attack_choice.choices = values | |
# # return gr.Dropdown.update(choices=list(image_infos[selected_category].keys())) | |
# # Function to load images based on selections from both combo boxes | |
# def load_images(category, subcategory): | |
# return image_infos.get(category, {}).get(subcategory, []) | |
# # Update gallery based on both combo box selections | |
# model_choice.change( | |
# fn=update_subcategories, inputs=model_choice, outputs=attack_choice | |
# ) | |
# attack_choice.change( | |
# fn=load_images, inputs=[model_choice, attack_choice], outputs=gallery | |
# ) | |
return image_infos | |
def video_examples_tab(abs_path: Path): | |
dataset_name = "sav_val_full" | |
datatype = "video" | |
db_key = "sa-v_sav_val_videos" | |
image_infos = build_infos( | |
abs_path, | |
datatype=datatype, | |
dataset_name=dataset_name, | |
db_key=db_key, | |
) | |
return image_infos | |
# First combo box (category selection) | |
# model_choice = gr.Dropdown( | |
# choices=list(image_infos.keys()), | |
# label="Select a Model", | |
# value=None, | |
# ) | |
# Second combo box (subcategory selection) | |
# Initialize with options from the first category by default | |
# attack_choice = gr.Dropdown( | |
# choices=list(image_infos["videoseal_0.0"].keys()), | |
# label="Select an Attack", | |
# value=None, | |
# ) | |
# Gallery component to display images | |
# gallery = gr.Gallery( | |
# label="Video Gallery", | |
# columns=4, | |
# rows=1, | |
# ) | |
# Update options for the second combo box when the first one changes | |
# def update_subcategories(selected_category): | |
# values = list(image_infos[selected_category].keys()) | |
# values = [(v, v) for v in values] | |
# attack_choice.choices = values | |
# # return gr.Dropdown.update(choices=list(image_infos[selected_category].keys())) | |
# Function to load images based on selections from both combo boxes | |
# def load_images(category, subcategory): | |
# return image_infos.get(category, {}).get(subcategory, []) | |
# # Update gallery based on both combo box selections | |
# model_choice.change( | |
# fn=update_subcategories, inputs=model_choice, outputs=attack_choice | |
# ) | |
# attack_choice.change( | |
# fn=load_images, inputs=[model_choice, attack_choice], outputs=gallery | |
# ) | |
def audio_examples_tab(abs_path: Path): | |
dataset_name = "voxpopuli" | |
datatype = "audio" | |
db_key = "voxpopuli" | |
audio_infos = build_infos( | |
abs_path, | |
datatype=datatype, | |
dataset_name=dataset_name, | |
db_key=db_key, | |
) | |
return audio_infos | |
print(audio_infos) | |
# First combo box (category selection) | |
# model_choice = gr.Dropdown( | |
# choices=list(audio_infos.keys()), | |
# label="Select a Model", | |
# value=None, | |
# ) | |
# Second combo box (subcategory selection) | |
# Initialize with options from the first category by default | |
attack_choice = gr.Dropdown( | |
choices=list(audio_infos["audioseal"].keys()), | |
label="Select an Attack", | |
value=None, | |
) | |
# Gallery component to display images | |
gallery = gr.Gallery( | |
label="Image Gallery", columns=4, rows=1, object_fit="scale-down" | |
) | |
audio_player = gr.Audio(visible=False) | |
audio_map_state = gr.State({}) | |
# Update options for the second combo box when the first one changes | |
def update_subcategories(selected_category): | |
values = list(audio_infos[selected_category].keys()) | |
values = [(v, v) for v in values] | |
attack_choice.choices = values | |
# return gr.Dropdown.update(choices=list(image_infos[selected_category].keys())) | |
# Function to load images based on selections from both combo boxes | |
def load_audios(category, subcategory): | |
files = audio_infos.get(category, {}).get(subcategory, []) | |
images = [f for f in files if f[0].endswith(".png")] | |
audios = {f[0]: f[0].replace(".png", ".wav") for f in images} | |
return images, audios | |
def play_audio(selected_image, audios): | |
image_path = selected_image["image"]["path"] | |
audio_file = audios.get(image_path) | |
return gr.update(value=audio_file, visible=audio_file is not None) | |
def hide_audio_player(): | |
# Hide the audio player when the preview is closed | |
return gr.update(visible=False) | |
def get_selected_image(select_data: gr.SelectData, audios): | |
if select_data is None: | |
return gr.update(visible=False) | |
selected_image = select_data.value | |
return play_audio(selected_image, audios) | |
# Update gallery based on both combo box selections | |
model_choice.change( | |
fn=update_subcategories, inputs=model_choice, outputs=attack_choice | |
) | |
attack_choice.change( | |
fn=load_audios, | |
inputs=[model_choice, attack_choice], | |
outputs=[gallery, audio_map_state], | |
) | |
gallery.select( | |
fn=get_selected_image, | |
inputs=[audio_map_state], | |
outputs=audio_player, | |
) | |
gallery.preview_close( | |
fn=hide_audio_player, | |
outputs=audio_player, | |
) | |
return gr.Column([model_choice, attack_choice, gallery, audio_player]) | |