Spaces:

dikshit98
/

ai-image-checker-mvp

Running

App Files Files Community

dikshit98 commited on 10 days ago

Commit

cb139f9

verified ·

1 Parent(s): d4b802a

Upload 3 files

Browse files

Files changed (3) hide show

README.md +21 -0
app.py +145 -0
requirements.txt +9 -0

README.md ADDED Viewed

	@@ -0,0 +1,21 @@

+# AI Image Checker (MVP)
+**What it does**
+- Checks **provenance** via one-click link to **Adobe Verify** (upload the same image there to see Content Credentials / C2PA).
+- Runs **EXIF** checks (missing/odd tags).
+- Runs **ELA** (Error-Level Analysis) and a **lightweight ViT classifier**.
+- Combines model + heuristics into a simple, explainable verdict.
+**Deploy in 3 steps**
+1) Create a **Hugging Face account** → New **Space** → Type: **Streamlit**.
+2) Upload these files: `app.py`, `requirements.txt`, `README.md`.
+3) Hit **Deploy**. That’s it.
+**Use**
+- Upload an image.
+- Click **Open Adobe Verify** and upload the same image there to check for **Content Credentials** (the gold-standard when present).
+- Review **EXIF**, **ELA**, and **AI confidence**. Adjust the threshold in the sidebar.
+**Notes**
+- Results are probabilistic; newer generators may fool detectors. Provenance (C2PA) > detection where available.
+- This is a portfolio demo; not a certified forensic tool.

app.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import io, json
+import numpy as np
+import streamlit as st
+from PIL import Image, ImageChops, ImageStat, ExifTags
+import exifread
+st.set_page_config(page_title="AI Image Checker (MVP)", layout="centered")
+st.title("AI Image Checker (MVP)")
+st.caption("EXIF + ELA heuristics + ViT classifier + Provenance (Adobe Verify link)")
+# ---------------------- MODEL --------------------------
+HF_MODEL_ID = "dima806/deepfake_vs_real_image_detection"  # ViT fine-tuned Real vs Fake
+@st.cache_resource(show_spinner=True)
+def load_hf_pipeline():
+    from transformers import pipeline
+    return pipeline("image-classification", model=HF_MODEL_ID)
+# ---------------------- HELPERS ------------------------
+def read_basic_exif(pil_img, raw_bytes):
+    info = pil_img.getexif()
+    exif = {}
+    if info:
+        for tag, val in info.items():
+            name = ExifTags.TAGS.get(tag, tag)
+            exif[name] = str(val)[:200]
+    try:
+        tags = exifread.process_file(io.BytesIO(raw_bytes), details=False)
+        for k, v in tags.items():
+            exif[k] = str(v)[:200]
+    except Exception:
+        pass
+    suspicious_tags = [k for k in exif.keys() if any(t in k.lower() for t in [
+        "software","artist","generator","ai","model","stable","midjourney","dalle","firefly","synthid"
+    ])]
+    missing_core = ("DateTimeOriginal" not in exif) and ("EXIF DateTimeOriginal" not in exif)
+    return exif, suspicious_tags, missing_core
+def error_level_analysis(pil_img, quality=95):
+    if pil_img.mode != "RGB":
+        pil_img = pil_img.convert("RGB")
+    buf = io.BytesIO()
+    pil_img.save(buf, "JPEG", quality=quality)
+    recompressed = Image.open(io.BytesIO(buf.getvalue()))
+    ela = ImageChops.difference(pil_img, recompressed)
+    extrema = ela.getextrema()
+    maxdiff = max([e[1] for e in extrema])
+    scale = 255.0 / max(1, maxdiff)
+    ela_enhanced = Image.eval(ela, lambda p: int(p * scale))
+    return ela_enhanced
+def ela_features(ela_img):
+    stat = ImageStat.Stat(ela_img)
+    mean = np.mean(stat.mean)
+    rms = np.mean(stat.rms)
+    var = np.mean(stat.var)
+    bbox = ela_img.getbbox()
+    filled = 0 if not bbox else ((bbox[2]-bbox[0])*(bbox[3]-bbox[1]))/(ela_img.size[0]*ela_img.size[1])
+    return np.array([mean, rms, var, filled], dtype=np.float32)
+def heuristic_score(exif_suspicious, exif_missing_core, ela_feat):
+    score = 0.0
+    if exif_missing_core: score += 0.15
+    if exif_suspicious:   score += min(0.25, 0.05*len(exif_suspicious))
+    mean, rms, var, filled = ela_feat
+    if mean < 5:    score += 0.15
+    if rms < 10:    score += 0.10
+    if filled > .95: score += 0.15
+    return max(0.0, min(1.0, score))
+def combine_probs(model_fake_prob, heur_fake_prob, weight_model=0.75):
+    return float(weight_model*model_fake_prob + (1-weight_model)*heur_fake_prob)
+# ---------------------- SIDEBAR ------------------------
+with st.sidebar:
+    st.header("Settings")
+    fake_threshold = st.slider("Decision threshold (Fake if ≥ this)", 0.0, 1.0, 0.55, 0.01)
+    weight_model = st.slider("Model weight in ensemble", 0.0, 1.0, 0.75, 0.05)
+    st.caption("Lower threshold to flag more images as AI. Results are probabilistic.")
+# ---------------------- MAIN --------------------------
+uploaded = st.file_uploader("Drop an image (JPG/PNG/WebP)", type=["jpg","jpeg","png","webp"])
+if uploaded:
+    raw = uploaded.read()
+    img = Image.open(io.BytesIO(raw)).convert("RGB")
+    st.image(img, caption="Uploaded", use_column_width=True)
+    # ---- Provenance (C2PA / Content Credentials) ----
+    st.subheader("Provenance (Content Credentials)")
+    st.caption("First, check if the image carries signed Content Credentials (C2PA).")
+    st.link_button("Open Adobe Verify", "https://verify.contentcredentials.org")
+    st.caption("Upload the same image on the Verify page to see if credentials exist and what edits occurred.")
+    # ---- Metadata (EXIF) ----
+    st.subheader("Metadata (EXIF)")
+    exif, suspicious, missing_core = read_basic_exif(img, raw)
+    with st.expander("View EXIF details"):
+        st.json({k: exif[k] for k in sorted(exif.keys())})
+    if suspicious:
+        st.warning("Suspicious metadata tags: " + ", ".join(suspicious[:8]))
+    if missing_core:
+        st.info("No original capture timestamp in EXIF; many generated/edited images strip EXIF.")
+    # ---- ELA ----
+    st.subheader("Error Level Analysis (ELA)")
+    ela_img = error_level_analysis(img, quality=95)
+    st.image(ela_img, caption="ELA (enhanced for visibility)", use_column_width=True)
+    feats = ela_features(ela_img)
+    # ---- Model prediction (ViT) ----
+    clf = load_hf_pipeline()
+    preds_orig = clf(img)
+    preds_ela  = clf(ela_img)
+    def fake_prob(preds):
+        score_by_label = {p["label"].lower(): float(p["score"]) for p in preds}
+        return score_by_label.get("fake", 1.0 - score_by_label.get("real", 0.0))
+    model_fake = max(fake_prob(preds_orig), fake_prob(preds_ela))
+    # ---- Heuristics ----
+    heur_fake = heuristic_score(suspicious, missing_core, feats)
+    # ---- Ensemble & verdict ----
+    combined = combine_probs(model_fake, heur_fake, weight_model=weight_model)
+    verdict = "Likely AI" if combined >= fake_threshold else ("Uncertain" if combined >= (fake_threshold - 0.15) else "Likely Real")
+    st.subheader(f"Result: {verdict}")
+    st.write(f"**Combined AI confidence (0–100):** {int(combined*100)}")
+    with st.expander("Why this result? (breakdown)"):
+        st.write({
+            "model_fake_prob": round(model_fake, 4),
+            "heuristic_fake_prob": round(heur_fake, 4),
+            "threshold": fake_threshold,
+            "model_weight": weight_model
+        })
+        st.markdown("""
+- **Provenance first:** If Content Credentials are present (see Adobe Verify), trust that signed record over any detector.
+- **Model:** ViT classifier fine-tuned for Real vs Fake.
+- **Heuristics:** EXIF clues + ELA artifact features.
+- **Note:** Detectors are probabilistic; generators evolve quickly (concept drift).
+""")
+st.caption("Portfolio demo. For newsroom/forensics, pair detection with provenance (C2PA/Content Credentials).")

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+streamlit
+Pillow
+exifread
+numpy
+torch
+torchvision
+torchaudio
+transformers
+timm