dikshit98's picture
Upload 3 files
cb139f9 verified
import io, json
import numpy as np
import streamlit as st
from PIL import Image, ImageChops, ImageStat, ExifTags
import exifread
st.set_page_config(page_title="AI Image Checker (MVP)", layout="centered")
st.title("AI Image Checker (MVP)")
st.caption("EXIF + ELA heuristics + ViT classifier + Provenance (Adobe Verify link)")
# ---------------------- MODEL --------------------------
HF_MODEL_ID = "dima806/deepfake_vs_real_image_detection" # ViT fine-tuned Real vs Fake
@st.cache_resource(show_spinner=True)
def load_hf_pipeline():
from transformers import pipeline
return pipeline("image-classification", model=HF_MODEL_ID)
# ---------------------- HELPERS ------------------------
def read_basic_exif(pil_img, raw_bytes):
info = pil_img.getexif()
exif = {}
if info:
for tag, val in info.items():
name = ExifTags.TAGS.get(tag, tag)
exif[name] = str(val)[:200]
try:
tags = exifread.process_file(io.BytesIO(raw_bytes), details=False)
for k, v in tags.items():
exif[k] = str(v)[:200]
except Exception:
pass
suspicious_tags = [k for k in exif.keys() if any(t in k.lower() for t in [
"software","artist","generator","ai","model","stable","midjourney","dalle","firefly","synthid"
])]
missing_core = ("DateTimeOriginal" not in exif) and ("EXIF DateTimeOriginal" not in exif)
return exif, suspicious_tags, missing_core
def error_level_analysis(pil_img, quality=95):
if pil_img.mode != "RGB":
pil_img = pil_img.convert("RGB")
buf = io.BytesIO()
pil_img.save(buf, "JPEG", quality=quality)
recompressed = Image.open(io.BytesIO(buf.getvalue()))
ela = ImageChops.difference(pil_img, recompressed)
extrema = ela.getextrema()
maxdiff = max([e[1] for e in extrema])
scale = 255.0 / max(1, maxdiff)
ela_enhanced = Image.eval(ela, lambda p: int(p * scale))
return ela_enhanced
def ela_features(ela_img):
stat = ImageStat.Stat(ela_img)
mean = np.mean(stat.mean)
rms = np.mean(stat.rms)
var = np.mean(stat.var)
bbox = ela_img.getbbox()
filled = 0 if not bbox else ((bbox[2]-bbox[0])*(bbox[3]-bbox[1]))/(ela_img.size[0]*ela_img.size[1])
return np.array([mean, rms, var, filled], dtype=np.float32)
def heuristic_score(exif_suspicious, exif_missing_core, ela_feat):
score = 0.0
if exif_missing_core: score += 0.15
if exif_suspicious: score += min(0.25, 0.05*len(exif_suspicious))
mean, rms, var, filled = ela_feat
if mean < 5: score += 0.15
if rms < 10: score += 0.10
if filled > .95: score += 0.15
return max(0.0, min(1.0, score))
def combine_probs(model_fake_prob, heur_fake_prob, weight_model=0.75):
return float(weight_model*model_fake_prob + (1-weight_model)*heur_fake_prob)
# ---------------------- SIDEBAR ------------------------
with st.sidebar:
st.header("Settings")
fake_threshold = st.slider("Decision threshold (Fake if ≥ this)", 0.0, 1.0, 0.55, 0.01)
weight_model = st.slider("Model weight in ensemble", 0.0, 1.0, 0.75, 0.05)
st.caption("Lower threshold to flag more images as AI. Results are probabilistic.")
# ---------------------- MAIN --------------------------
uploaded = st.file_uploader("Drop an image (JPG/PNG/WebP)", type=["jpg","jpeg","png","webp"])
if uploaded:
raw = uploaded.read()
img = Image.open(io.BytesIO(raw)).convert("RGB")
st.image(img, caption="Uploaded", use_column_width=True)
# ---- Provenance (C2PA / Content Credentials) ----
st.subheader("Provenance (Content Credentials)")
st.caption("First, check if the image carries signed Content Credentials (C2PA).")
st.link_button("Open Adobe Verify", "https://verify.contentcredentials.org")
st.caption("Upload the same image on the Verify page to see if credentials exist and what edits occurred.")
# ---- Metadata (EXIF) ----
st.subheader("Metadata (EXIF)")
exif, suspicious, missing_core = read_basic_exif(img, raw)
with st.expander("View EXIF details"):
st.json({k: exif[k] for k in sorted(exif.keys())})
if suspicious:
st.warning("Suspicious metadata tags: " + ", ".join(suspicious[:8]))
if missing_core:
st.info("No original capture timestamp in EXIF; many generated/edited images strip EXIF.")
# ---- ELA ----
st.subheader("Error Level Analysis (ELA)")
ela_img = error_level_analysis(img, quality=95)
st.image(ela_img, caption="ELA (enhanced for visibility)", use_column_width=True)
feats = ela_features(ela_img)
# ---- Model prediction (ViT) ----
clf = load_hf_pipeline()
preds_orig = clf(img)
preds_ela = clf(ela_img)
def fake_prob(preds):
score_by_label = {p["label"].lower(): float(p["score"]) for p in preds}
return score_by_label.get("fake", 1.0 - score_by_label.get("real", 0.0))
model_fake = max(fake_prob(preds_orig), fake_prob(preds_ela))
# ---- Heuristics ----
heur_fake = heuristic_score(suspicious, missing_core, feats)
# ---- Ensemble & verdict ----
combined = combine_probs(model_fake, heur_fake, weight_model=weight_model)
verdict = "Likely AI" if combined >= fake_threshold else ("Uncertain" if combined >= (fake_threshold - 0.15) else "Likely Real")
st.subheader(f"Result: {verdict}")
st.write(f"**Combined AI confidence (0–100):** {int(combined*100)}")
with st.expander("Why this result? (breakdown)"):
st.write({
"model_fake_prob": round(model_fake, 4),
"heuristic_fake_prob": round(heur_fake, 4),
"threshold": fake_threshold,
"model_weight": weight_model
})
st.markdown("""
- **Provenance first:** If Content Credentials are present (see Adobe Verify), trust that signed record over any detector.
- **Model:** ViT classifier fine-tuned for Real vs Fake.
- **Heuristics:** EXIF clues + ELA artifact features.
- **Note:** Detectors are probabilistic; generators evolve quickly (concept drift).
""")
st.caption("Portfolio demo. For newsroom/forensics, pair detection with provenance (C2PA/Content Credentials).")