File size: 6,128 Bytes
cb139f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import io, json
import numpy as np
import streamlit as st
from PIL import Image, ImageChops, ImageStat, ExifTags
import exifread

st.set_page_config(page_title="AI Image Checker (MVP)", layout="centered")
st.title("AI Image Checker (MVP)")
st.caption("EXIF + ELA heuristics + ViT classifier + Provenance (Adobe Verify link)")

# ---------------------- MODEL --------------------------
HF_MODEL_ID = "dima806/deepfake_vs_real_image_detection"  # ViT fine-tuned Real vs Fake

@st.cache_resource(show_spinner=True)
def load_hf_pipeline():
    from transformers import pipeline
    return pipeline("image-classification", model=HF_MODEL_ID)

# ---------------------- HELPERS ------------------------
def read_basic_exif(pil_img, raw_bytes):
    info = pil_img.getexif()
    exif = {}
    if info:
        for tag, val in info.items():
            name = ExifTags.TAGS.get(tag, tag)
            exif[name] = str(val)[:200]
    try:
        tags = exifread.process_file(io.BytesIO(raw_bytes), details=False)
        for k, v in tags.items():
            exif[k] = str(v)[:200]
    except Exception:
        pass
    suspicious_tags = [k for k in exif.keys() if any(t in k.lower() for t in [
        "software","artist","generator","ai","model","stable","midjourney","dalle","firefly","synthid"
    ])]
    missing_core = ("DateTimeOriginal" not in exif) and ("EXIF DateTimeOriginal" not in exif)
    return exif, suspicious_tags, missing_core

def error_level_analysis(pil_img, quality=95):
    if pil_img.mode != "RGB":
        pil_img = pil_img.convert("RGB")
    buf = io.BytesIO()
    pil_img.save(buf, "JPEG", quality=quality)
    recompressed = Image.open(io.BytesIO(buf.getvalue()))
    ela = ImageChops.difference(pil_img, recompressed)
    extrema = ela.getextrema()
    maxdiff = max([e[1] for e in extrema])
    scale = 255.0 / max(1, maxdiff)
    ela_enhanced = Image.eval(ela, lambda p: int(p * scale))
    return ela_enhanced

def ela_features(ela_img):
    stat = ImageStat.Stat(ela_img)
    mean = np.mean(stat.mean)
    rms = np.mean(stat.rms)
    var = np.mean(stat.var)
    bbox = ela_img.getbbox()
    filled = 0 if not bbox else ((bbox[2]-bbox[0])*(bbox[3]-bbox[1]))/(ela_img.size[0]*ela_img.size[1])
    return np.array([mean, rms, var, filled], dtype=np.float32)

def heuristic_score(exif_suspicious, exif_missing_core, ela_feat):
    score = 0.0
    if exif_missing_core: score += 0.15
    if exif_suspicious:   score += min(0.25, 0.05*len(exif_suspicious))
    mean, rms, var, filled = ela_feat
    if mean < 5:    score += 0.15
    if rms < 10:    score += 0.10
    if filled > .95: score += 0.15
    return max(0.0, min(1.0, score))

def combine_probs(model_fake_prob, heur_fake_prob, weight_model=0.75):
    return float(weight_model*model_fake_prob + (1-weight_model)*heur_fake_prob)

# ---------------------- SIDEBAR ------------------------
with st.sidebar:
    st.header("Settings")
    fake_threshold = st.slider("Decision threshold (Fake if ≥ this)", 0.0, 1.0, 0.55, 0.01)
    weight_model = st.slider("Model weight in ensemble", 0.0, 1.0, 0.75, 0.05)
    st.caption("Lower threshold to flag more images as AI. Results are probabilistic.")

# ---------------------- MAIN --------------------------
uploaded = st.file_uploader("Drop an image (JPG/PNG/WebP)", type=["jpg","jpeg","png","webp"])
if uploaded:
    raw = uploaded.read()
    img = Image.open(io.BytesIO(raw)).convert("RGB")
    st.image(img, caption="Uploaded", use_column_width=True)

    # ---- Provenance (C2PA / Content Credentials) ----
    st.subheader("Provenance (Content Credentials)")
    st.caption("First, check if the image carries signed Content Credentials (C2PA).")
    st.link_button("Open Adobe Verify", "https://verify.contentcredentials.org")
    st.caption("Upload the same image on the Verify page to see if credentials exist and what edits occurred.")

    # ---- Metadata (EXIF) ----
    st.subheader("Metadata (EXIF)")
    exif, suspicious, missing_core = read_basic_exif(img, raw)
    with st.expander("View EXIF details"):
        st.json({k: exif[k] for k in sorted(exif.keys())})
    if suspicious:
        st.warning("Suspicious metadata tags: " + ", ".join(suspicious[:8]))
    if missing_core:
        st.info("No original capture timestamp in EXIF; many generated/edited images strip EXIF.")

    # ---- ELA ----
    st.subheader("Error Level Analysis (ELA)")
    ela_img = error_level_analysis(img, quality=95)
    st.image(ela_img, caption="ELA (enhanced for visibility)", use_column_width=True)
    feats = ela_features(ela_img)

    # ---- Model prediction (ViT) ----
    clf = load_hf_pipeline()
    preds_orig = clf(img)
    preds_ela  = clf(ela_img)

    def fake_prob(preds):
        score_by_label = {p["label"].lower(): float(p["score"]) for p in preds}
        return score_by_label.get("fake", 1.0 - score_by_label.get("real", 0.0))

    model_fake = max(fake_prob(preds_orig), fake_prob(preds_ela))

    # ---- Heuristics ----
    heur_fake = heuristic_score(suspicious, missing_core, feats)

    # ---- Ensemble & verdict ----
    combined = combine_probs(model_fake, heur_fake, weight_model=weight_model)
    verdict = "Likely AI" if combined >= fake_threshold else ("Uncertain" if combined >= (fake_threshold - 0.15) else "Likely Real")

    st.subheader(f"Result: {verdict}")
    st.write(f"**Combined AI confidence (0–100):** {int(combined*100)}")

    with st.expander("Why this result? (breakdown)"):
        st.write({
            "model_fake_prob": round(model_fake, 4),
            "heuristic_fake_prob": round(heur_fake, 4),
            "threshold": fake_threshold,
            "model_weight": weight_model
        })
        st.markdown("""
- **Provenance first:** If Content Credentials are present (see Adobe Verify), trust that signed record over any detector.
- **Model:** ViT classifier fine-tuned for Real vs Fake.
- **Heuristics:** EXIF clues + ELA artifact features.
- **Note:** Detectors are probabilistic; generators evolve quickly (concept drift).
""")

st.caption("Portfolio demo. For newsroom/forensics, pair detection with provenance (C2PA/Content Credentials).")