Spaces:
Sleeping
Sleeping
import io, json | |
import numpy as np | |
import streamlit as st | |
from PIL import Image, ImageChops, ImageStat, ExifTags | |
import exifread | |
st.set_page_config(page_title="AI Image Checker (MVP)", layout="centered") | |
st.title("AI Image Checker (MVP)") | |
st.caption("EXIF + ELA heuristics + ViT classifier + Provenance (Adobe Verify link)") | |
# ---------------------- MODEL -------------------------- | |
HF_MODEL_ID = "dima806/deepfake_vs_real_image_detection" # ViT fine-tuned Real vs Fake | |
def load_hf_pipeline(): | |
from transformers import pipeline | |
return pipeline("image-classification", model=HF_MODEL_ID) | |
# ---------------------- HELPERS ------------------------ | |
def read_basic_exif(pil_img, raw_bytes): | |
info = pil_img.getexif() | |
exif = {} | |
if info: | |
for tag, val in info.items(): | |
name = ExifTags.TAGS.get(tag, tag) | |
exif[name] = str(val)[:200] | |
try: | |
tags = exifread.process_file(io.BytesIO(raw_bytes), details=False) | |
for k, v in tags.items(): | |
exif[k] = str(v)[:200] | |
except Exception: | |
pass | |
suspicious_tags = [k for k in exif.keys() if any(t in k.lower() for t in [ | |
"software","artist","generator","ai","model","stable","midjourney","dalle","firefly","synthid" | |
])] | |
missing_core = ("DateTimeOriginal" not in exif) and ("EXIF DateTimeOriginal" not in exif) | |
return exif, suspicious_tags, missing_core | |
def error_level_analysis(pil_img, quality=95): | |
if pil_img.mode != "RGB": | |
pil_img = pil_img.convert("RGB") | |
buf = io.BytesIO() | |
pil_img.save(buf, "JPEG", quality=quality) | |
recompressed = Image.open(io.BytesIO(buf.getvalue())) | |
ela = ImageChops.difference(pil_img, recompressed) | |
extrema = ela.getextrema() | |
maxdiff = max([e[1] for e in extrema]) | |
scale = 255.0 / max(1, maxdiff) | |
ela_enhanced = Image.eval(ela, lambda p: int(p * scale)) | |
return ela_enhanced | |
def ela_features(ela_img): | |
stat = ImageStat.Stat(ela_img) | |
mean = np.mean(stat.mean) | |
rms = np.mean(stat.rms) | |
var = np.mean(stat.var) | |
bbox = ela_img.getbbox() | |
filled = 0 if not bbox else ((bbox[2]-bbox[0])*(bbox[3]-bbox[1]))/(ela_img.size[0]*ela_img.size[1]) | |
return np.array([mean, rms, var, filled], dtype=np.float32) | |
def heuristic_score(exif_suspicious, exif_missing_core, ela_feat): | |
score = 0.0 | |
if exif_missing_core: score += 0.15 | |
if exif_suspicious: score += min(0.25, 0.05*len(exif_suspicious)) | |
mean, rms, var, filled = ela_feat | |
if mean < 5: score += 0.15 | |
if rms < 10: score += 0.10 | |
if filled > .95: score += 0.15 | |
return max(0.0, min(1.0, score)) | |
def combine_probs(model_fake_prob, heur_fake_prob, weight_model=0.75): | |
return float(weight_model*model_fake_prob + (1-weight_model)*heur_fake_prob) | |
# ---------------------- SIDEBAR ------------------------ | |
with st.sidebar: | |
st.header("Settings") | |
fake_threshold = st.slider("Decision threshold (Fake if ≥ this)", 0.0, 1.0, 0.55, 0.01) | |
weight_model = st.slider("Model weight in ensemble", 0.0, 1.0, 0.75, 0.05) | |
st.caption("Lower threshold to flag more images as AI. Results are probabilistic.") | |
# ---------------------- MAIN -------------------------- | |
uploaded = st.file_uploader("Drop an image (JPG/PNG/WebP)", type=["jpg","jpeg","png","webp"]) | |
if uploaded: | |
raw = uploaded.read() | |
img = Image.open(io.BytesIO(raw)).convert("RGB") | |
st.image(img, caption="Uploaded", use_column_width=True) | |
# ---- Provenance (C2PA / Content Credentials) ---- | |
st.subheader("Provenance (Content Credentials)") | |
st.caption("First, check if the image carries signed Content Credentials (C2PA).") | |
st.link_button("Open Adobe Verify", "https://verify.contentcredentials.org") | |
st.caption("Upload the same image on the Verify page to see if credentials exist and what edits occurred.") | |
# ---- Metadata (EXIF) ---- | |
st.subheader("Metadata (EXIF)") | |
exif, suspicious, missing_core = read_basic_exif(img, raw) | |
with st.expander("View EXIF details"): | |
st.json({k: exif[k] for k in sorted(exif.keys())}) | |
if suspicious: | |
st.warning("Suspicious metadata tags: " + ", ".join(suspicious[:8])) | |
if missing_core: | |
st.info("No original capture timestamp in EXIF; many generated/edited images strip EXIF.") | |
# ---- ELA ---- | |
st.subheader("Error Level Analysis (ELA)") | |
ela_img = error_level_analysis(img, quality=95) | |
st.image(ela_img, caption="ELA (enhanced for visibility)", use_column_width=True) | |
feats = ela_features(ela_img) | |
# ---- Model prediction (ViT) ---- | |
clf = load_hf_pipeline() | |
preds_orig = clf(img) | |
preds_ela = clf(ela_img) | |
def fake_prob(preds): | |
score_by_label = {p["label"].lower(): float(p["score"]) for p in preds} | |
return score_by_label.get("fake", 1.0 - score_by_label.get("real", 0.0)) | |
model_fake = max(fake_prob(preds_orig), fake_prob(preds_ela)) | |
# ---- Heuristics ---- | |
heur_fake = heuristic_score(suspicious, missing_core, feats) | |
# ---- Ensemble & verdict ---- | |
combined = combine_probs(model_fake, heur_fake, weight_model=weight_model) | |
verdict = "Likely AI" if combined >= fake_threshold else ("Uncertain" if combined >= (fake_threshold - 0.15) else "Likely Real") | |
st.subheader(f"Result: {verdict}") | |
st.write(f"**Combined AI confidence (0–100):** {int(combined*100)}") | |
with st.expander("Why this result? (breakdown)"): | |
st.write({ | |
"model_fake_prob": round(model_fake, 4), | |
"heuristic_fake_prob": round(heur_fake, 4), | |
"threshold": fake_threshold, | |
"model_weight": weight_model | |
}) | |
st.markdown(""" | |
- **Provenance first:** If Content Credentials are present (see Adobe Verify), trust that signed record over any detector. | |
- **Model:** ViT classifier fine-tuned for Real vs Fake. | |
- **Heuristics:** EXIF clues + ELA artifact features. | |
- **Note:** Detectors are probabilistic; generators evolve quickly (concept drift). | |
""") | |
st.caption("Portfolio demo. For newsroom/forensics, pair detection with provenance (C2PA/Content Credentials).") | |