Spaces:
Sleeping
Sleeping
File size: 6,128 Bytes
cb139f9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import io, json
import numpy as np
import streamlit as st
from PIL import Image, ImageChops, ImageStat, ExifTags
import exifread
st.set_page_config(page_title="AI Image Checker (MVP)", layout="centered")
st.title("AI Image Checker (MVP)")
st.caption("EXIF + ELA heuristics + ViT classifier + Provenance (Adobe Verify link)")
# ---------------------- MODEL --------------------------
HF_MODEL_ID = "dima806/deepfake_vs_real_image_detection" # ViT fine-tuned Real vs Fake
@st.cache_resource(show_spinner=True)
def load_hf_pipeline():
from transformers import pipeline
return pipeline("image-classification", model=HF_MODEL_ID)
# ---------------------- HELPERS ------------------------
def read_basic_exif(pil_img, raw_bytes):
info = pil_img.getexif()
exif = {}
if info:
for tag, val in info.items():
name = ExifTags.TAGS.get(tag, tag)
exif[name] = str(val)[:200]
try:
tags = exifread.process_file(io.BytesIO(raw_bytes), details=False)
for k, v in tags.items():
exif[k] = str(v)[:200]
except Exception:
pass
suspicious_tags = [k for k in exif.keys() if any(t in k.lower() for t in [
"software","artist","generator","ai","model","stable","midjourney","dalle","firefly","synthid"
])]
missing_core = ("DateTimeOriginal" not in exif) and ("EXIF DateTimeOriginal" not in exif)
return exif, suspicious_tags, missing_core
def error_level_analysis(pil_img, quality=95):
if pil_img.mode != "RGB":
pil_img = pil_img.convert("RGB")
buf = io.BytesIO()
pil_img.save(buf, "JPEG", quality=quality)
recompressed = Image.open(io.BytesIO(buf.getvalue()))
ela = ImageChops.difference(pil_img, recompressed)
extrema = ela.getextrema()
maxdiff = max([e[1] for e in extrema])
scale = 255.0 / max(1, maxdiff)
ela_enhanced = Image.eval(ela, lambda p: int(p * scale))
return ela_enhanced
def ela_features(ela_img):
stat = ImageStat.Stat(ela_img)
mean = np.mean(stat.mean)
rms = np.mean(stat.rms)
var = np.mean(stat.var)
bbox = ela_img.getbbox()
filled = 0 if not bbox else ((bbox[2]-bbox[0])*(bbox[3]-bbox[1]))/(ela_img.size[0]*ela_img.size[1])
return np.array([mean, rms, var, filled], dtype=np.float32)
def heuristic_score(exif_suspicious, exif_missing_core, ela_feat):
score = 0.0
if exif_missing_core: score += 0.15
if exif_suspicious: score += min(0.25, 0.05*len(exif_suspicious))
mean, rms, var, filled = ela_feat
if mean < 5: score += 0.15
if rms < 10: score += 0.10
if filled > .95: score += 0.15
return max(0.0, min(1.0, score))
def combine_probs(model_fake_prob, heur_fake_prob, weight_model=0.75):
return float(weight_model*model_fake_prob + (1-weight_model)*heur_fake_prob)
# ---------------------- SIDEBAR ------------------------
with st.sidebar:
st.header("Settings")
fake_threshold = st.slider("Decision threshold (Fake if ≥ this)", 0.0, 1.0, 0.55, 0.01)
weight_model = st.slider("Model weight in ensemble", 0.0, 1.0, 0.75, 0.05)
st.caption("Lower threshold to flag more images as AI. Results are probabilistic.")
# ---------------------- MAIN --------------------------
uploaded = st.file_uploader("Drop an image (JPG/PNG/WebP)", type=["jpg","jpeg","png","webp"])
if uploaded:
raw = uploaded.read()
img = Image.open(io.BytesIO(raw)).convert("RGB")
st.image(img, caption="Uploaded", use_column_width=True)
# ---- Provenance (C2PA / Content Credentials) ----
st.subheader("Provenance (Content Credentials)")
st.caption("First, check if the image carries signed Content Credentials (C2PA).")
st.link_button("Open Adobe Verify", "https://verify.contentcredentials.org")
st.caption("Upload the same image on the Verify page to see if credentials exist and what edits occurred.")
# ---- Metadata (EXIF) ----
st.subheader("Metadata (EXIF)")
exif, suspicious, missing_core = read_basic_exif(img, raw)
with st.expander("View EXIF details"):
st.json({k: exif[k] for k in sorted(exif.keys())})
if suspicious:
st.warning("Suspicious metadata tags: " + ", ".join(suspicious[:8]))
if missing_core:
st.info("No original capture timestamp in EXIF; many generated/edited images strip EXIF.")
# ---- ELA ----
st.subheader("Error Level Analysis (ELA)")
ela_img = error_level_analysis(img, quality=95)
st.image(ela_img, caption="ELA (enhanced for visibility)", use_column_width=True)
feats = ela_features(ela_img)
# ---- Model prediction (ViT) ----
clf = load_hf_pipeline()
preds_orig = clf(img)
preds_ela = clf(ela_img)
def fake_prob(preds):
score_by_label = {p["label"].lower(): float(p["score"]) for p in preds}
return score_by_label.get("fake", 1.0 - score_by_label.get("real", 0.0))
model_fake = max(fake_prob(preds_orig), fake_prob(preds_ela))
# ---- Heuristics ----
heur_fake = heuristic_score(suspicious, missing_core, feats)
# ---- Ensemble & verdict ----
combined = combine_probs(model_fake, heur_fake, weight_model=weight_model)
verdict = "Likely AI" if combined >= fake_threshold else ("Uncertain" if combined >= (fake_threshold - 0.15) else "Likely Real")
st.subheader(f"Result: {verdict}")
st.write(f"**Combined AI confidence (0–100):** {int(combined*100)}")
with st.expander("Why this result? (breakdown)"):
st.write({
"model_fake_prob": round(model_fake, 4),
"heuristic_fake_prob": round(heur_fake, 4),
"threshold": fake_threshold,
"model_weight": weight_model
})
st.markdown("""
- **Provenance first:** If Content Credentials are present (see Adobe Verify), trust that signed record over any detector.
- **Model:** ViT classifier fine-tuned for Real vs Fake.
- **Heuristics:** EXIF clues + ELA artifact features.
- **Note:** Detectors are probabilistic; generators evolve quickly (concept drift).
""")
st.caption("Portfolio demo. For newsroom/forensics, pair detection with provenance (C2PA/Content Credentials).")
|