Spaces:

dikshit98
/

ai-image-checker-mvp

Sleeping

App Files Files Community

ai-image-checker-mvp / app.py

dikshit98

Upload 3 files

cb139f9 verified 26 days ago

raw

history blame contribute delete

6.13 kB

	import io, json
	import numpy as np
	import streamlit as st
	from PIL import Image, ImageChops, ImageStat, ExifTags
	import exifread

	st.set_page_config(page_title="AI Image Checker (MVP)", layout="centered")
	st.title("AI Image Checker (MVP)")
	st.caption("EXIF + ELA heuristics + ViT classifier + Provenance (Adobe Verify link)")

	# ---------------------- MODEL --------------------------
	HF_MODEL_ID = "dima806/deepfake_vs_real_image_detection" # ViT fine-tuned Real vs Fake

	@st.cache_resource(show_spinner=True)
	def load_hf_pipeline():
	from transformers import pipeline
	return pipeline("image-classification", model=HF_MODEL_ID)

	# ---------------------- HELPERS ------------------------
	def read_basic_exif(pil_img, raw_bytes):
	info = pil_img.getexif()
	exif = {}
	if info:
	for tag, val in info.items():
	name = ExifTags.TAGS.get(tag, tag)
	exif[name] = str(val)[:200]
	try:
	tags = exifread.process_file(io.BytesIO(raw_bytes), details=False)
	for k, v in tags.items():
	exif[k] = str(v)[:200]
	except Exception:
	pass
	suspicious_tags = [k for k in exif.keys() if any(t in k.lower() for t in [
	"software","artist","generator","ai","model","stable","midjourney","dalle","firefly","synthid"
	])]
	missing_core = ("DateTimeOriginal" not in exif) and ("EXIF DateTimeOriginal" not in exif)
	return exif, suspicious_tags, missing_core

	def error_level_analysis(pil_img, quality=95):
	if pil_img.mode != "RGB":
	pil_img = pil_img.convert("RGB")
	buf = io.BytesIO()
	pil_img.save(buf, "JPEG", quality=quality)
	recompressed = Image.open(io.BytesIO(buf.getvalue()))
	ela = ImageChops.difference(pil_img, recompressed)
	extrema = ela.getextrema()
	maxdiff = max([e[1] for e in extrema])
	scale = 255.0 / max(1, maxdiff)
	ela_enhanced = Image.eval(ela, lambda p: int(p * scale))
	return ela_enhanced

	def ela_features(ela_img):
	stat = ImageStat.Stat(ela_img)
	mean = np.mean(stat.mean)
	rms = np.mean(stat.rms)
	var = np.mean(stat.var)
	bbox = ela_img.getbbox()
	filled = 0 if not bbox else ((bbox[2]-bbox[0])(bbox[3]-bbox[1]))/(ela_img.size[0]ela_img.size[1])
	return np.array([mean, rms, var, filled], dtype=np.float32)

	def heuristic_score(exif_suspicious, exif_missing_core, ela_feat):
	score = 0.0
	if exif_missing_core: score += 0.15
	if exif_suspicious: score += min(0.25, 0.05*len(exif_suspicious))
	mean, rms, var, filled = ela_feat
	if mean < 5: score += 0.15
	if rms < 10: score += 0.10
	if filled > .95: score += 0.15
	return max(0.0, min(1.0, score))

	def combine_probs(model_fake_prob, heur_fake_prob, weight_model=0.75):
	return float(weight_modelmodel_fake_prob + (1-weight_model)heur_fake_prob)

	# ---------------------- SIDEBAR ------------------------
	with st.sidebar:
	st.header("Settings")
	fake_threshold = st.slider("Decision threshold (Fake if ≥ this)", 0.0, 1.0, 0.55, 0.01)
	weight_model = st.slider("Model weight in ensemble", 0.0, 1.0, 0.75, 0.05)
	st.caption("Lower threshold to flag more images as AI. Results are probabilistic.")

	# ---------------------- MAIN --------------------------
	uploaded = st.file_uploader("Drop an image (JPG/PNG/WebP)", type=["jpg","jpeg","png","webp"])
	if uploaded:
	raw = uploaded.read()
	img = Image.open(io.BytesIO(raw)).convert("RGB")
	st.image(img, caption="Uploaded", use_column_width=True)

	# ---- Provenance (C2PA / Content Credentials) ----
	st.subheader("Provenance (Content Credentials)")
	st.caption("First, check if the image carries signed Content Credentials (C2PA).")
	st.link_button("Open Adobe Verify", "https://verify.contentcredentials.org")
	st.caption("Upload the same image on the Verify page to see if credentials exist and what edits occurred.")

	# ---- Metadata (EXIF) ----
	st.subheader("Metadata (EXIF)")
	exif, suspicious, missing_core = read_basic_exif(img, raw)
	with st.expander("View EXIF details"):
	st.json({k: exif[k] for k in sorted(exif.keys())})
	if suspicious:
	st.warning("Suspicious metadata tags: " + ", ".join(suspicious[:8]))
	if missing_core:
	st.info("No original capture timestamp in EXIF; many generated/edited images strip EXIF.")

	# ---- ELA ----
	st.subheader("Error Level Analysis (ELA)")
	ela_img = error_level_analysis(img, quality=95)
	st.image(ela_img, caption="ELA (enhanced for visibility)", use_column_width=True)
	feats = ela_features(ela_img)

	# ---- Model prediction (ViT) ----
	clf = load_hf_pipeline()
	preds_orig = clf(img)
	preds_ela = clf(ela_img)

	def fake_prob(preds):
	score_by_label = {p["label"].lower(): float(p["score"]) for p in preds}
	return score_by_label.get("fake", 1.0 - score_by_label.get("real", 0.0))

	model_fake = max(fake_prob(preds_orig), fake_prob(preds_ela))

	# ---- Heuristics ----
	heur_fake = heuristic_score(suspicious, missing_core, feats)

	# ---- Ensemble & verdict ----
	combined = combine_probs(model_fake, heur_fake, weight_model=weight_model)
	verdict = "Likely AI" if combined >= fake_threshold else ("Uncertain" if combined >= (fake_threshold - 0.15) else "Likely Real")

	st.subheader(f"Result: {verdict}")
	st.write(f"Combined AI confidence (0–100): {int(combined*100)}")

	with st.expander("Why this result? (breakdown)"):
	st.write({
	"model_fake_prob": round(model_fake, 4),
	"heuristic_fake_prob": round(heur_fake, 4),
	"threshold": fake_threshold,
	"model_weight": weight_model
	})
	st.markdown("""
	- Provenance first: If Content Credentials are present (see Adobe Verify), trust that signed record over any detector.
	- Model: ViT classifier fine-tuned for Real vs Fake.
	- Heuristics: EXIF clues + ELA artifact features.
	- Note: Detectors are probabilistic; generators evolve quickly (concept drift).
	""")

	st.caption("Portfolio demo. For newsroom/forensics, pair detection with provenance (C2PA/Content Credentials).")