import io, json import numpy as np import streamlit as st from PIL import Image, ImageChops, ImageStat, ExifTags import exifread st.set_page_config(page_title="AI Image Checker (MVP)", layout="centered") st.title("AI Image Checker (MVP)") st.caption("EXIF + ELA heuristics + ViT classifier + Provenance (Adobe Verify link)") # ---------------------- MODEL -------------------------- HF_MODEL_ID = "dima806/deepfake_vs_real_image_detection" # ViT fine-tuned Real vs Fake @st.cache_resource(show_spinner=True) def load_hf_pipeline(): from transformers import pipeline return pipeline("image-classification", model=HF_MODEL_ID) # ---------------------- HELPERS ------------------------ def read_basic_exif(pil_img, raw_bytes): info = pil_img.getexif() exif = {} if info: for tag, val in info.items(): name = ExifTags.TAGS.get(tag, tag) exif[name] = str(val)[:200] try: tags = exifread.process_file(io.BytesIO(raw_bytes), details=False) for k, v in tags.items(): exif[k] = str(v)[:200] except Exception: pass suspicious_tags = [k for k in exif.keys() if any(t in k.lower() for t in [ "software","artist","generator","ai","model","stable","midjourney","dalle","firefly","synthid" ])] missing_core = ("DateTimeOriginal" not in exif) and ("EXIF DateTimeOriginal" not in exif) return exif, suspicious_tags, missing_core def error_level_analysis(pil_img, quality=95): if pil_img.mode != "RGB": pil_img = pil_img.convert("RGB") buf = io.BytesIO() pil_img.save(buf, "JPEG", quality=quality) recompressed = Image.open(io.BytesIO(buf.getvalue())) ela = ImageChops.difference(pil_img, recompressed) extrema = ela.getextrema() maxdiff = max([e[1] for e in extrema]) scale = 255.0 / max(1, maxdiff) ela_enhanced = Image.eval(ela, lambda p: int(p * scale)) return ela_enhanced def ela_features(ela_img): stat = ImageStat.Stat(ela_img) mean = np.mean(stat.mean) rms = np.mean(stat.rms) var = np.mean(stat.var) bbox = ela_img.getbbox() filled = 0 if not bbox else ((bbox[2]-bbox[0])*(bbox[3]-bbox[1]))/(ela_img.size[0]*ela_img.size[1]) return np.array([mean, rms, var, filled], dtype=np.float32) def heuristic_score(exif_suspicious, exif_missing_core, ela_feat): score = 0.0 if exif_missing_core: score += 0.15 if exif_suspicious: score += min(0.25, 0.05*len(exif_suspicious)) mean, rms, var, filled = ela_feat if mean < 5: score += 0.15 if rms < 10: score += 0.10 if filled > .95: score += 0.15 return max(0.0, min(1.0, score)) def combine_probs(model_fake_prob, heur_fake_prob, weight_model=0.75): return float(weight_model*model_fake_prob + (1-weight_model)*heur_fake_prob) # ---------------------- SIDEBAR ------------------------ with st.sidebar: st.header("Settings") fake_threshold = st.slider("Decision threshold (Fake if ≥ this)", 0.0, 1.0, 0.55, 0.01) weight_model = st.slider("Model weight in ensemble", 0.0, 1.0, 0.75, 0.05) st.caption("Lower threshold to flag more images as AI. Results are probabilistic.") # ---------------------- MAIN -------------------------- uploaded = st.file_uploader("Drop an image (JPG/PNG/WebP)", type=["jpg","jpeg","png","webp"]) if uploaded: raw = uploaded.read() img = Image.open(io.BytesIO(raw)).convert("RGB") st.image(img, caption="Uploaded", use_column_width=True) # ---- Provenance (C2PA / Content Credentials) ---- st.subheader("Provenance (Content Credentials)") st.caption("First, check if the image carries signed Content Credentials (C2PA).") st.link_button("Open Adobe Verify", "https://verify.contentcredentials.org") st.caption("Upload the same image on the Verify page to see if credentials exist and what edits occurred.") # ---- Metadata (EXIF) ---- st.subheader("Metadata (EXIF)") exif, suspicious, missing_core = read_basic_exif(img, raw) with st.expander("View EXIF details"): st.json({k: exif[k] for k in sorted(exif.keys())}) if suspicious: st.warning("Suspicious metadata tags: " + ", ".join(suspicious[:8])) if missing_core: st.info("No original capture timestamp in EXIF; many generated/edited images strip EXIF.") # ---- ELA ---- st.subheader("Error Level Analysis (ELA)") ela_img = error_level_analysis(img, quality=95) st.image(ela_img, caption="ELA (enhanced for visibility)", use_column_width=True) feats = ela_features(ela_img) # ---- Model prediction (ViT) ---- clf = load_hf_pipeline() preds_orig = clf(img) preds_ela = clf(ela_img) def fake_prob(preds): score_by_label = {p["label"].lower(): float(p["score"]) for p in preds} return score_by_label.get("fake", 1.0 - score_by_label.get("real", 0.0)) model_fake = max(fake_prob(preds_orig), fake_prob(preds_ela)) # ---- Heuristics ---- heur_fake = heuristic_score(suspicious, missing_core, feats) # ---- Ensemble & verdict ---- combined = combine_probs(model_fake, heur_fake, weight_model=weight_model) verdict = "Likely AI" if combined >= fake_threshold else ("Uncertain" if combined >= (fake_threshold - 0.15) else "Likely Real") st.subheader(f"Result: {verdict}") st.write(f"**Combined AI confidence (0–100):** {int(combined*100)}") with st.expander("Why this result? (breakdown)"): st.write({ "model_fake_prob": round(model_fake, 4), "heuristic_fake_prob": round(heur_fake, 4), "threshold": fake_threshold, "model_weight": weight_model }) st.markdown(""" - **Provenance first:** If Content Credentials are present (see Adobe Verify), trust that signed record over any detector. - **Model:** ViT classifier fine-tuned for Real vs Fake. - **Heuristics:** EXIF clues + ELA artifact features. - **Note:** Detectors are probabilistic; generators evolve quickly (concept drift). """) st.caption("Portfolio demo. For newsroom/forensics, pair detection with provenance (C2PA/Content Credentials).")