File size: 6,269 Bytes
fcfe2af d008d64 5bffed5 295487b fcfe2af d008d64 fcfe2af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
import os
os.system("python -m pip install --upgrade pip")
os.system("pip install git+https://github.com/rwightman/pytorch-image-models")
os.system("pip install git+https://github.com/huggingface/huggingface_hub")
import gradio as gr
import timm
import torch
from torch import nn
from torch.nn import functional as F
import torchvision
class Model200M(torch.nn.Module):
def __init__(self):
super().__init__()
self.model = timm.create_model('convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_384', pretrained=False,
num_classes=0)
self.clf = nn.Sequential(
nn.Linear(1536, 128),
nn.ReLU(inplace=True),
nn.Linear(128, 2))
def forward(self, image):
image_features = self.model(image)
return self.clf(image_features)
class Model5M(torch.nn.Module):
def __init__(self):
super().__init__()
self.model = timm.create_model('timm/tf_mobilenetv3_large_100.in1k', pretrained=False, num_classes=0)
self.clf = nn.Sequential(
nn.Linear(1280, 128),
nn.ReLU(inplace=True),
nn.Linear(128, 2))
def forward(self, image):
image_features = self.model(image)
return self.clf(image_features)
def load_model(name: str):
model = Model200M() if "200M" in name else Model5M()
ckpt = torch.load(name, map_location=torch.device('cpu'))
model.load_state_dict(ckpt)
model.eval()
return model
model_list = {
'midjourney_200M': load_model('models/midjourney200M.pt'),
'diffusions_200M': load_model('models/diffusions200M.pt'),
'midjourney_5M': load_model('models/midjourney5M.pt'),
'diffusions_5M': load_model('models/diffusions5M.pt')
}
tfm = torchvision.transforms.Compose([
torchvision.transforms.Resize((640, 640)),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])
tfm_small = torchvision.transforms.Compose([
torchvision.transforms.Resize((224, 224)),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])
def predict_from_model(model, img_1):
y = model.forward(img_1[None, ...])
y_1 = F.softmax(y, dim=1)[:, 1].cpu().detach().numpy()
y_2 = F.softmax(y, dim=1)[:, 0].cpu().detach().numpy()
return {'created by AI': y_1.tolist(),
'created by human': y_2.tolist()}
def predict(raw_image, model_name):
img_1 = tfm(raw_image)
img_2 = tfm_small(raw_image)
if model_name not in model_list:
return {'error': [0.]}
model = model_list[model_name]
img = img_1 if "200M" in model_name else img_2
return predict_from_model(model, img)
general_examples = [
["images/general/img_1.jpg"],
["images/general/img_2.jpg"],
["images/general/img_3.jpg"],
["images/general/img_4.jpg"],
["images/general/img_5.jpg"],
["images/general/img_6.jpg"],
["images/general/img_7.jpg"],
["images/general/img_8.jpg"],
["images/general/img_9.jpg"],
["images/general/img_10.jpg"],
]
optic_examples = [
["images/optic/img_1.jpg"],
["images/optic/img_2.jpg"],
["images/optic/img_3.jpg"],
["images/optic/img_4.jpg"],
["images/optic/img_5.jpg"],
]
famous_deepfake_examples = [
["images/famous_deepfakes/img_1.jpg"],
["images/famous_deepfakes/img_2.jpg"],
["images/famous_deepfakes/img_3.jpg"],
["images/famous_deepfakes/img_4.webp"],
]
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
<h1 style="text-align: center;">For Fake's Sake: a set of models for detecting generated and synthetic images</h3>
This is a demo space for synthetic image detectors:
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_200'>midjourney200M</a>,
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_5'>midjourney5M</a>,
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_200'>diffusions200M</a>,
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_5'>diffusions5M</a>.<br>
We provide several detectors for images generated by popular tools, such as Midjourney and Stable Diffusion.<br>
Please refer to model cards for evaluation metrics and limitations.
"""
)
with gr.Row():
with gr.Column():
image_input = gr.Image(type="pil")
drop_down = gr.Dropdown(model_list.keys(), type="value", label="Model", value="diffusions_200M")
with gr.Row():
gr.ClearButton(components=[image_input])
submit_button = gr.Button("Submit", variant="primary")
with gr.Column():
result_score = gr.Label(label='result', num_top_classes=2)
with gr.Tab("Examples"):
gr.Examples(examples=general_examples, inputs=image_input)
# with gr.Tab("More examples"):
# gr.Examples(examples=optic_examples, inputs=image_input)
with gr.Tab("Widely known deepfakes"):
gr.Examples(examples=famous_deepfake_examples, inputs=image_input)
submit_button.click(predict, inputs=[image_input, drop_down], outputs=result_score)
gr.Markdown(
"""
<h3>Models</h3>
<p><code>*_200M</code> models are based on <code>convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_384</code> with image size <code>640x640</code></p>
<p><code>*_5M</code> models are based on <code>tf_mobilenetv3_large_100.in1k</code> with image size <code>224x224</code></p>
<h3>Details</h3>
<li>Model cards: <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_200'>midjourney200M</a>,
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_5'>midjourney5M</a>,
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_200'>diffusions200M</a>,
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_5'>diffusions5M</a>.
</li>
<li>License: CC-By-SA-3.0</li>
"""
)
demo.launch() |