Spaces:
Running
Running
Upload 3 files
Browse files- README.md +21 -0
- app.py +145 -0
- requirements.txt +9 -0
README.md
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# AI Image Checker (MVP)
|
2 |
+
|
3 |
+
**What it does**
|
4 |
+
- Checks **provenance** via one-click link to **Adobe Verify** (upload the same image there to see Content Credentials / C2PA).
|
5 |
+
- Runs **EXIF** checks (missing/odd tags).
|
6 |
+
- Runs **ELA** (Error-Level Analysis) and a **lightweight ViT classifier**.
|
7 |
+
- Combines model + heuristics into a simple, explainable verdict.
|
8 |
+
|
9 |
+
**Deploy in 3 steps**
|
10 |
+
1) Create a **Hugging Face account** → New **Space** → Type: **Streamlit**.
|
11 |
+
2) Upload these files: `app.py`, `requirements.txt`, `README.md`.
|
12 |
+
3) Hit **Deploy**. That’s it.
|
13 |
+
|
14 |
+
**Use**
|
15 |
+
- Upload an image.
|
16 |
+
- Click **Open Adobe Verify** and upload the same image there to check for **Content Credentials** (the gold-standard when present).
|
17 |
+
- Review **EXIF**, **ELA**, and **AI confidence**. Adjust the threshold in the sidebar.
|
18 |
+
|
19 |
+
**Notes**
|
20 |
+
- Results are probabilistic; newer generators may fool detectors. Provenance (C2PA) > detection where available.
|
21 |
+
- This is a portfolio demo; not a certified forensic tool.
|
app.py
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import io, json
|
2 |
+
import numpy as np
|
3 |
+
import streamlit as st
|
4 |
+
from PIL import Image, ImageChops, ImageStat, ExifTags
|
5 |
+
import exifread
|
6 |
+
|
7 |
+
st.set_page_config(page_title="AI Image Checker (MVP)", layout="centered")
|
8 |
+
st.title("AI Image Checker (MVP)")
|
9 |
+
st.caption("EXIF + ELA heuristics + ViT classifier + Provenance (Adobe Verify link)")
|
10 |
+
|
11 |
+
# ---------------------- MODEL --------------------------
|
12 |
+
HF_MODEL_ID = "dima806/deepfake_vs_real_image_detection" # ViT fine-tuned Real vs Fake
|
13 |
+
|
14 |
+
@st.cache_resource(show_spinner=True)
|
15 |
+
def load_hf_pipeline():
|
16 |
+
from transformers import pipeline
|
17 |
+
return pipeline("image-classification", model=HF_MODEL_ID)
|
18 |
+
|
19 |
+
# ---------------------- HELPERS ------------------------
|
20 |
+
def read_basic_exif(pil_img, raw_bytes):
|
21 |
+
info = pil_img.getexif()
|
22 |
+
exif = {}
|
23 |
+
if info:
|
24 |
+
for tag, val in info.items():
|
25 |
+
name = ExifTags.TAGS.get(tag, tag)
|
26 |
+
exif[name] = str(val)[:200]
|
27 |
+
try:
|
28 |
+
tags = exifread.process_file(io.BytesIO(raw_bytes), details=False)
|
29 |
+
for k, v in tags.items():
|
30 |
+
exif[k] = str(v)[:200]
|
31 |
+
except Exception:
|
32 |
+
pass
|
33 |
+
suspicious_tags = [k for k in exif.keys() if any(t in k.lower() for t in [
|
34 |
+
"software","artist","generator","ai","model","stable","midjourney","dalle","firefly","synthid"
|
35 |
+
])]
|
36 |
+
missing_core = ("DateTimeOriginal" not in exif) and ("EXIF DateTimeOriginal" not in exif)
|
37 |
+
return exif, suspicious_tags, missing_core
|
38 |
+
|
39 |
+
def error_level_analysis(pil_img, quality=95):
|
40 |
+
if pil_img.mode != "RGB":
|
41 |
+
pil_img = pil_img.convert("RGB")
|
42 |
+
buf = io.BytesIO()
|
43 |
+
pil_img.save(buf, "JPEG", quality=quality)
|
44 |
+
recompressed = Image.open(io.BytesIO(buf.getvalue()))
|
45 |
+
ela = ImageChops.difference(pil_img, recompressed)
|
46 |
+
extrema = ela.getextrema()
|
47 |
+
maxdiff = max([e[1] for e in extrema])
|
48 |
+
scale = 255.0 / max(1, maxdiff)
|
49 |
+
ela_enhanced = Image.eval(ela, lambda p: int(p * scale))
|
50 |
+
return ela_enhanced
|
51 |
+
|
52 |
+
def ela_features(ela_img):
|
53 |
+
stat = ImageStat.Stat(ela_img)
|
54 |
+
mean = np.mean(stat.mean)
|
55 |
+
rms = np.mean(stat.rms)
|
56 |
+
var = np.mean(stat.var)
|
57 |
+
bbox = ela_img.getbbox()
|
58 |
+
filled = 0 if not bbox else ((bbox[2]-bbox[0])*(bbox[3]-bbox[1]))/(ela_img.size[0]*ela_img.size[1])
|
59 |
+
return np.array([mean, rms, var, filled], dtype=np.float32)
|
60 |
+
|
61 |
+
def heuristic_score(exif_suspicious, exif_missing_core, ela_feat):
|
62 |
+
score = 0.0
|
63 |
+
if exif_missing_core: score += 0.15
|
64 |
+
if exif_suspicious: score += min(0.25, 0.05*len(exif_suspicious))
|
65 |
+
mean, rms, var, filled = ela_feat
|
66 |
+
if mean < 5: score += 0.15
|
67 |
+
if rms < 10: score += 0.10
|
68 |
+
if filled > .95: score += 0.15
|
69 |
+
return max(0.0, min(1.0, score))
|
70 |
+
|
71 |
+
def combine_probs(model_fake_prob, heur_fake_prob, weight_model=0.75):
|
72 |
+
return float(weight_model*model_fake_prob + (1-weight_model)*heur_fake_prob)
|
73 |
+
|
74 |
+
# ---------------------- SIDEBAR ------------------------
|
75 |
+
with st.sidebar:
|
76 |
+
st.header("Settings")
|
77 |
+
fake_threshold = st.slider("Decision threshold (Fake if ≥ this)", 0.0, 1.0, 0.55, 0.01)
|
78 |
+
weight_model = st.slider("Model weight in ensemble", 0.0, 1.0, 0.75, 0.05)
|
79 |
+
st.caption("Lower threshold to flag more images as AI. Results are probabilistic.")
|
80 |
+
|
81 |
+
# ---------------------- MAIN --------------------------
|
82 |
+
uploaded = st.file_uploader("Drop an image (JPG/PNG/WebP)", type=["jpg","jpeg","png","webp"])
|
83 |
+
if uploaded:
|
84 |
+
raw = uploaded.read()
|
85 |
+
img = Image.open(io.BytesIO(raw)).convert("RGB")
|
86 |
+
st.image(img, caption="Uploaded", use_column_width=True)
|
87 |
+
|
88 |
+
# ---- Provenance (C2PA / Content Credentials) ----
|
89 |
+
st.subheader("Provenance (Content Credentials)")
|
90 |
+
st.caption("First, check if the image carries signed Content Credentials (C2PA).")
|
91 |
+
st.link_button("Open Adobe Verify", "https://verify.contentcredentials.org")
|
92 |
+
st.caption("Upload the same image on the Verify page to see if credentials exist and what edits occurred.")
|
93 |
+
|
94 |
+
# ---- Metadata (EXIF) ----
|
95 |
+
st.subheader("Metadata (EXIF)")
|
96 |
+
exif, suspicious, missing_core = read_basic_exif(img, raw)
|
97 |
+
with st.expander("View EXIF details"):
|
98 |
+
st.json({k: exif[k] for k in sorted(exif.keys())})
|
99 |
+
if suspicious:
|
100 |
+
st.warning("Suspicious metadata tags: " + ", ".join(suspicious[:8]))
|
101 |
+
if missing_core:
|
102 |
+
st.info("No original capture timestamp in EXIF; many generated/edited images strip EXIF.")
|
103 |
+
|
104 |
+
# ---- ELA ----
|
105 |
+
st.subheader("Error Level Analysis (ELA)")
|
106 |
+
ela_img = error_level_analysis(img, quality=95)
|
107 |
+
st.image(ela_img, caption="ELA (enhanced for visibility)", use_column_width=True)
|
108 |
+
feats = ela_features(ela_img)
|
109 |
+
|
110 |
+
# ---- Model prediction (ViT) ----
|
111 |
+
clf = load_hf_pipeline()
|
112 |
+
preds_orig = clf(img)
|
113 |
+
preds_ela = clf(ela_img)
|
114 |
+
|
115 |
+
def fake_prob(preds):
|
116 |
+
score_by_label = {p["label"].lower(): float(p["score"]) for p in preds}
|
117 |
+
return score_by_label.get("fake", 1.0 - score_by_label.get("real", 0.0))
|
118 |
+
|
119 |
+
model_fake = max(fake_prob(preds_orig), fake_prob(preds_ela))
|
120 |
+
|
121 |
+
# ---- Heuristics ----
|
122 |
+
heur_fake = heuristic_score(suspicious, missing_core, feats)
|
123 |
+
|
124 |
+
# ---- Ensemble & verdict ----
|
125 |
+
combined = combine_probs(model_fake, heur_fake, weight_model=weight_model)
|
126 |
+
verdict = "Likely AI" if combined >= fake_threshold else ("Uncertain" if combined >= (fake_threshold - 0.15) else "Likely Real")
|
127 |
+
|
128 |
+
st.subheader(f"Result: {verdict}")
|
129 |
+
st.write(f"**Combined AI confidence (0–100):** {int(combined*100)}")
|
130 |
+
|
131 |
+
with st.expander("Why this result? (breakdown)"):
|
132 |
+
st.write({
|
133 |
+
"model_fake_prob": round(model_fake, 4),
|
134 |
+
"heuristic_fake_prob": round(heur_fake, 4),
|
135 |
+
"threshold": fake_threshold,
|
136 |
+
"model_weight": weight_model
|
137 |
+
})
|
138 |
+
st.markdown("""
|
139 |
+
- **Provenance first:** If Content Credentials are present (see Adobe Verify), trust that signed record over any detector.
|
140 |
+
- **Model:** ViT classifier fine-tuned for Real vs Fake.
|
141 |
+
- **Heuristics:** EXIF clues + ELA artifact features.
|
142 |
+
- **Note:** Detectors are probabilistic; generators evolve quickly (concept drift).
|
143 |
+
""")
|
144 |
+
|
145 |
+
st.caption("Portfolio demo. For newsroom/forensics, pair detection with provenance (C2PA/Content Credentials).")
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
Pillow
|
3 |
+
exifread
|
4 |
+
numpy
|
5 |
+
torch
|
6 |
+
torchvision
|
7 |
+
torchaudio
|
8 |
+
transformers
|
9 |
+
timm
|