Spaces:

dikshit98
/

ai-image-checker-mvp

Sleeping

File size: 6,128 Bytes

cb139f9

import io, json
import numpy as np
import streamlit as st
from PIL import Image, ImageChops, ImageStat, ExifTags
import exifread

st.set_page_config(page_title="AI Image Checker (MVP)", layout="centered")
st.title("AI Image Checker (MVP)")
st.caption("EXIF + ELA heuristics + ViT classifier + Provenance (Adobe Verify link)")

# ---------------------- MODEL --------------------------
HF_MODEL_ID = "dima806/deepfake_vs_real_image_detection"  # ViT fine-tuned Real vs Fake

@st.cache_resource(show_spinner=True)
def load_hf_pipeline():
    from transformers import pipeline
    return pipeline("image-classification", model=HF_MODEL_ID)

# ---------------------- HELPERS ------------------------
def read_basic_exif(pil_img, raw_bytes):
    info = pil_img.getexif()
    exif = {}
    if info:
        for tag, val in info.items():
            name = ExifTags.TAGS.get(tag, tag)
            exif[name] = str(val)[:200]
    try:
        tags = exifread.process_file(io.BytesIO(raw_bytes), details=False)
        for k, v in tags.items():
            exif[k] = str(v)[:200]
    except Exception:
        pass
    suspicious_tags = [k for k in exif.keys() if any(t in k.lower() for t in [
        "software","artist","generator","ai","model","stable","midjourney","dalle","firefly","synthid"
    ])]
    missing_core = ("DateTimeOriginal" not in exif) and ("EXIF DateTimeOriginal" not in exif)
    return exif, suspicious_tags, missing_core

def error_level_analysis(pil_img, quality=95):
    if pil_img.mode != "RGB":
        pil_img = pil_img.convert("RGB")
    buf = io.BytesIO()
    pil_img.save(buf, "JPEG", quality=quality)
    recompressed = Image.open(io.BytesIO(buf.getvalue()))
    ela = ImageChops.difference(pil_img, recompressed)
    extrema = ela.getextrema()
    maxdiff = max([e[1] for e in extrema])
    scale = 255.0 / max(1, maxdiff)
    ela_enhanced = Image.eval(ela, lambda p: int(p * scale))
    return ela_enhanced

def ela_features(ela_img):
    stat = ImageStat.Stat(ela_img)
    mean = np.mean(stat.mean)
    rms = np.mean(stat.rms)
    var = np.mean(stat.var)
    bbox = ela_img.getbbox()
    filled = 0 if not bbox else ((bbox[2]-bbox[0])*(bbox[3]-bbox[1]))/(ela_img.size[0]*ela_img.size[1])
    return np.array([mean, rms, var, filled], dtype=np.float32)

def heuristic_score(exif_suspicious, exif_missing_core, ela_feat):
    score = 0.0
    if exif_missing_core: score += 0.15
    if exif_suspicious:   score += min(0.25, 0.05*len(exif_suspicious))
    mean, rms, var, filled = ela_feat
    if mean < 5:    score += 0.15
    if rms < 10:    score += 0.10
    if filled > .95: score += 0.15
    return max(0.0, min(1.0, score))

def combine_probs(model_fake_prob, heur_fake_prob, weight_model=0.75):
    return float(weight_model*model_fake_prob + (1-weight_model)*heur_fake_prob)

# ---------------------- SIDEBAR ------------------------
with st.sidebar:
    st.header("Settings")
    fake_threshold = st.slider("Decision threshold (Fake if ≥ this)", 0.0, 1.0, 0.55, 0.01)
    weight_model = st.slider("Model weight in ensemble", 0.0, 1.0, 0.75, 0.05)
    st.caption("Lower threshold to flag more images as AI. Results are probabilistic.")

# ---------------------- MAIN --------------------------
uploaded = st.file_uploader("Drop an image (JPG/PNG/WebP)", type=["jpg","jpeg","png","webp"])
if uploaded:
    raw = uploaded.read()
    img = Image.open(io.BytesIO(raw)).convert("RGB")
    st.image(img, caption="Uploaded", use_column_width=True)

    # ---- Provenance (C2PA / Content Credentials) ----
    st.subheader("Provenance (Content Credentials)")
    st.caption("First, check if the image carries signed Content Credentials (C2PA).")
    st.link_button("Open Adobe Verify", "https://verify.contentcredentials.org")
    st.caption("Upload the same image on the Verify page to see if credentials exist and what edits occurred.")

    # ---- Metadata (EXIF) ----
    st.subheader("Metadata (EXIF)")
    exif, suspicious, missing_core = read_basic_exif(img, raw)
    with st.expander("View EXIF details"):
        st.json({k: exif[k] for k in sorted(exif.keys())})
    if suspicious:
        st.warning("Suspicious metadata tags: " + ", ".join(suspicious[:8]))
    if missing_core:
        st.info("No original capture timestamp in EXIF; many generated/edited images strip EXIF.")

    # ---- ELA ----
    st.subheader("Error Level Analysis (ELA)")
    ela_img = error_level_analysis(img, quality=95)
    st.image(ela_img, caption="ELA (enhanced for visibility)", use_column_width=True)
    feats = ela_features(ela_img)

    # ---- Model prediction (ViT) ----
    clf = load_hf_pipeline()
    preds_orig = clf(img)
    preds_ela  = clf(ela_img)

    def fake_prob(preds):
        score_by_label = {p["label"].lower(): float(p["score"]) for p in preds}
        return score_by_label.get("fake", 1.0 - score_by_label.get("real", 0.0))

    model_fake = max(fake_prob(preds_orig), fake_prob(preds_ela))

    # ---- Heuristics ----
    heur_fake = heuristic_score(suspicious, missing_core, feats)

    # ---- Ensemble & verdict ----
    combined = combine_probs(model_fake, heur_fake, weight_model=weight_model)
    verdict = "Likely AI" if combined >= fake_threshold else ("Uncertain" if combined >= (fake_threshold - 0.15) else "Likely Real")

    st.subheader(f"Result: {verdict}")
    st.write(f"**Combined AI confidence (0–100):** {int(combined*100)}")

    with st.expander("Why this result? (breakdown)"):
        st.write({
            "model_fake_prob": round(model_fake, 4),
            "heuristic_fake_prob": round(heur_fake, 4),
            "threshold": fake_threshold,
            "model_weight": weight_model
        })
        st.markdown("""
- **Provenance first:** If Content Credentials are present (see Adobe Verify), trust that signed record over any detector.
- **Model:** ViT classifier fine-tuned for Real vs Fake.
- **Heuristics:** EXIF clues + ELA artifact features.
- **Note:** Detectors are probabilistic; generators evolve quickly (concept drift).
""")

st.caption("Portfolio demo. For newsroom/forensics, pair detection with provenance (C2PA/Content Credentials).")