LPX
commited on
Commit
·
c9d5b11
1
Parent(s):
82e63cc
(major)feat: add new functionalities for AI-generated content detection
Browse files- Introduced a comprehensive project introduction and implementation strategy for AI-generated content detection.
- Added new Gradio tabs for wavelet blocking noise estimation, bit plane extraction, and EXIF metadata extraction.
- Enhanced the user interface to include detailed descriptions for each new feature, improving usability and accessibility.
- app_mcp.py +87 -0
- utils/bitplane.py +27 -0
- utils/exif.py +11 -0
- utils/wavelet.py +20 -0
app_mcp.py
CHANGED
@@ -17,6 +17,13 @@ from utils.gradient import gradient_processing
|
|
17 |
from utils.minmax import preprocess as minmax_preprocess
|
18 |
from utils.ela import genELA as ELA
|
19 |
from forensics.registry import register_model, MODEL_REGISTRY, ModelEntry
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
|
22 |
# Configure logging
|
@@ -87,6 +94,57 @@ CLASS_NAMES = {
|
|
87 |
|
88 |
}
|
89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
def preprocess_resize_256(image):
|
91 |
if image.mode != 'RGB':
|
92 |
image = image.convert('RGB')
|
@@ -341,12 +399,41 @@ with gr.Blocks(css="#post-gallery { overflow: hidden !important;} .grid-wrap{ ov
|
|
341 |
],
|
342 |
outputs=outputs
|
343 |
)
|
|
|
|
|
|
|
344 |
with gr.Tab("👑 Community Forensics Preview"):
|
345 |
temp_space = gr.load("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview", src="spaces")
|
346 |
# preview # no idea if this will work
|
347 |
with gr.Tab("🥇 Leaderboard"):
|
348 |
gr.Markdown("# AI Generated / Deepfake Detection Models Leaderboard: Soon™")
|
349 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
|
351 |
# Launch the interface
|
352 |
demo.launch(share=True)
|
|
|
17 |
from utils.minmax import preprocess as minmax_preprocess
|
18 |
from utils.ela import genELA as ELA
|
19 |
from forensics.registry import register_model, MODEL_REGISTRY, ModelEntry
|
20 |
+
import pywt
|
21 |
+
import cv2
|
22 |
+
import tempfile
|
23 |
+
import exiftool
|
24 |
+
from utils.wavelet import wavelet_blocking_noise_estimation
|
25 |
+
from utils.bitplane import bit_plane_extractor
|
26 |
+
from utils.exif import exif_full_dump
|
27 |
|
28 |
|
29 |
# Configure logging
|
|
|
94 |
|
95 |
}
|
96 |
|
97 |
+
QUICK_INTRO = """
|
98 |
+
### AI-Generated Content Detection: The Tipping Point
|
99 |
+
|
100 |
+
Remember that high-stakes game of whack-a-mole between deepfakes and detection algorithms that the world leaders promised to fund and fight? Well, to no surprise, that battle ended with what seems like a quiet acceptance of defeat. Despite massive increases in 2024 for research and funding for detection systems, it came to no surprise to anyone when the largest public detection project to date was effectively rendered useless just weeks after release.
|
101 |
+
|
102 |
+
Then came the sucker-punches. Month after month, SOTA models started dropping like they were on a release calendar:
|
103 |
+
• Hyper-realistic voice clones reading your emotional tells
|
104 |
+
• Zero-shot everything making reality checks irrelevant
|
105 |
+
• Image models that upgraded "plausible" to "indistinguishable" overnight
|
106 |
+
|
107 |
+
It was terrifying. Exhilarating. Hands-down the most fascinating existential rollercoaster since crypto crashed. And we all know why detection lost: **Defense always lags offense.** Pouring billions into bigger, slower models was like building thicker castle walls while the enemy developed drone strikes.
|
108 |
+
|
109 |
+
The research exodus wasn't betrayal – it was sanity. Why battle an unwinnable arms race when there's actual progress to be made elsewhere? And let's be honest: we saw this coming. When has humanity ever resisted accelerating technology that promises... *interesting* applications? As the ancients wisely tweeted: 🔞 drives innovation.
|
110 |
+
|
111 |
+
So what now? We pivot.
|
112 |
+
✅ Stop pretending we'll ever "solve" deepfakes. Accept they'll keep evolving.
|
113 |
+
✅ Learn from cybersecurity: Shift from impossible prevention to damage control
|
114 |
+
✅ Embrace and strive for radical efficiency – 10X the output at 0.1X the resource burn
|
115 |
+
|
116 |
+
But here's the silver lining, the hard-won wisdom, and the next chapter: efficiency. It's time to shift our focus from perpetual catch-up to smarter integration and acceptance.
|
117 |
+
|
118 |
+
Because our current approach? Training mammoth models on volcanic-scale energy consumption to chase diminishing returns? That's the real deepfake we should be fighting.
|
119 |
+
|
120 |
+
Next section: Practical, absurdly efficient alternatives already showing promise. It's not SOTA, but it just makes sense. ⚡
|
121 |
+
|
122 |
+
"""
|
123 |
+
|
124 |
+
IMPLEMENTATION = """
|
125 |
+
### 1. **Shift away from the belief that more data leads to better results. Rather, focus on insight-driven and "quality over quantity" datasets in training.**
|
126 |
+
* **Move Away from Terabyte-Scale Datasets**: Focus on **quality over quantity** by curating a smaller, highly diverse, and **labeled dataset** emphasizing edge cases and the latest AI generations.
|
127 |
+
* **Active Learning**: Implement active learning techniques to iteratively select the most informative samples for human labeling, reducing dataset size while maintaining effectiveness.
|
128 |
+
|
129 |
+
### 2. **Efficient Model Architectures**
|
130 |
+
* **Adopt Lightweight, State-of-the-Art Models**: Explore models designed for efficiency like MobileNet, EfficientNet, or recent advancements in vision transformers (ViTs) tailored for forensic analysis.
|
131 |
+
* **Transfer Learning with Fine-Tuning**: Leverage pre-trained models fine-tuned on your curated dataset to leverage general knowledge while adapting to specific AI image detection tasks.
|
132 |
+
|
133 |
+
### 3. **Multi-Modal and Hybrid Approaches**
|
134 |
+
* **Combine Image Forensics with Metadata Analysis**: Integrate insights from image processing with metadata (e.g., EXIF, XMP) for a more robust detection framework.
|
135 |
+
* **Incorporate Knowledge Graphs for AI Model Identification**: If feasible, build or utilize knowledge graphs mapping known AI models to their generation signatures for targeted detection.
|
136 |
+
|
137 |
+
### 4. **Continuous Learning and Update Mechanism**
|
138 |
+
* **Online Learning or Incremental Training**: Implement a system that can incrementally update the model with new, strategically selected samples, adapting to new AI generation techniques.
|
139 |
+
* **Community-Driven Updates**: Establish a feedback loop with users/community to report undetected AI images, fueling model updates.
|
140 |
+
|
141 |
+
### 5. **Evaluation and Validation**
|
142 |
+
* **Robust Validation Protocols**: Regularly test against unseen, diverse datasets including novel AI generations not present during training.
|
143 |
+
* **Benchmark Against State-of-the-Art**: Periodically compare performance with newly published detection models or techniques.
|
144 |
+
|
145 |
+
|
146 |
+
"""
|
147 |
+
|
148 |
def preprocess_resize_256(image):
|
149 |
if image.mode != 'RGB':
|
150 |
image = image.convert('RGB')
|
|
|
399 |
],
|
400 |
outputs=outputs
|
401 |
)
|
402 |
+
with gr.Tab("🙈 Project Introduction"):
|
403 |
+
gr.Markdown("# AI Generated / Deepfake Detection Models Leaderboard: Soon™")
|
404 |
+
|
405 |
with gr.Tab("👑 Community Forensics Preview"):
|
406 |
temp_space = gr.load("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview", src="spaces")
|
407 |
# preview # no idea if this will work
|
408 |
with gr.Tab("🥇 Leaderboard"):
|
409 |
gr.Markdown("# AI Generated / Deepfake Detection Models Leaderboard: Soon™")
|
410 |
|
411 |
+
with gr.Tab("Wavelet Blocking Noise Estimation"):
|
412 |
+
gr.Interface(
|
413 |
+
fn=wavelet_blocking_noise_estimation,
|
414 |
+
inputs=[gr.Image(type="pil"), gr.Slider(1, 32, value=8, step=1, label="Block Size")],
|
415 |
+
outputs=gr.Image(type="pil"),
|
416 |
+
description="Wavelet-based local noise estimation"
|
417 |
+
)
|
418 |
+
with gr.Tab("Bit Plane Values"):
|
419 |
+
gr.Interface(
|
420 |
+
fn=bit_plane_extractor,
|
421 |
+
inputs=[
|
422 |
+
gr.Image(type="pil"),
|
423 |
+
gr.Dropdown(["Luminance", "Red", "Green", "Blue", "RGB Norm"], label="Channel", value="Luminance"),
|
424 |
+
gr.Slider(0, 7, value=0, step=1, label="Bit Plane"),
|
425 |
+
gr.Dropdown(["Disabled", "Median", "Gaussian"], label="Filter", value="Disabled")
|
426 |
+
],
|
427 |
+
outputs=gr.Image(type="pil"),
|
428 |
+
description="Extract and visualize bit planes from different channels."
|
429 |
+
)
|
430 |
+
with gr.Tab("EXIF Full Dump"):
|
431 |
+
gr.Interface(
|
432 |
+
fn=exif_full_dump,
|
433 |
+
inputs=gr.Image(type="pil"),
|
434 |
+
outputs=gr.JSON(),
|
435 |
+
description="Extract all EXIF metadata from the uploaded image."
|
436 |
+
)
|
437 |
|
438 |
# Launch the interface
|
439 |
demo.launch(share=True)
|
utils/bitplane.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2 as cv
|
2 |
+
import numpy as np
|
3 |
+
from PIL import Image
|
4 |
+
|
5 |
+
def bit_plane_extractor(
|
6 |
+
image: Image.Image,
|
7 |
+
channel: str = "Luminance",
|
8 |
+
bit: int = 0,
|
9 |
+
filter_type: str = "Disabled"
|
10 |
+
) -> Image.Image:
|
11 |
+
"""Extract and visualize a bit plane from a selected channel of the image."""
|
12 |
+
img = np.array(image.convert("RGB"))
|
13 |
+
if channel == "Luminance":
|
14 |
+
img = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
|
15 |
+
elif channel == "RGB Norm":
|
16 |
+
b, g, r = cv.split(img.astype(np.float64))
|
17 |
+
img = np.sqrt(np.power(b, 2) + np.power(g, 2) + np.power(r, 2)).astype(np.uint8)
|
18 |
+
else:
|
19 |
+
idx = {"Red": 0, "Green": 1, "Blue": 2}[channel]
|
20 |
+
img = img[:, :, idx]
|
21 |
+
plane = cv.bitwise_and(np.full_like(img, 2 ** bit), img)
|
22 |
+
plane = cv.normalize(plane, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
|
23 |
+
if filter_type == "Median":
|
24 |
+
plane = cv.medianBlur(plane, 3)
|
25 |
+
elif filter_type == "Gaussian":
|
26 |
+
plane = cv.GaussianBlur(plane, (3, 3), 0)
|
27 |
+
return Image.fromarray(plane)
|
utils/exif.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tempfile
|
2 |
+
import exiftool
|
3 |
+
from PIL import Image
|
4 |
+
|
5 |
+
def exif_full_dump(image: Image.Image) -> dict:
|
6 |
+
"""Extract all EXIF metadata from an image using exiftool."""
|
7 |
+
with tempfile.NamedTemporaryFile(suffix='.jpg', delete=True) as tmp:
|
8 |
+
image.save(tmp.name)
|
9 |
+
with exiftool.ExifTool() as et:
|
10 |
+
metadata = et.get_metadata(tmp.name)
|
11 |
+
return metadata
|
utils/wavelet.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pywt
|
3 |
+
import cv2
|
4 |
+
from PIL import Image
|
5 |
+
|
6 |
+
def wavelet_blocking_noise_estimation(image: Image.Image, blocksize: int = 8) -> Image.Image:
|
7 |
+
"""Estimate local noise using wavelet blocking. Returns a PIL image of the noise map."""
|
8 |
+
im = np.array(image.convert('L'))
|
9 |
+
y = np.double(im)
|
10 |
+
cA1, (cH, cV, cD) = pywt.dwt2(y, 'db8')
|
11 |
+
cD = cD[:cD.shape[0] // blocksize * blocksize, :cD.shape[1] // blocksize * blocksize]
|
12 |
+
block = np.zeros((cD.shape[0] // blocksize, cD.shape[1] // blocksize, blocksize ** 2))
|
13 |
+
for ii in range(0, cD.shape[0] - blocksize + 1, blocksize):
|
14 |
+
for jj in range(0, cD.shape[1] - blocksize + 1, blocksize):
|
15 |
+
block_elements = cD[ii:ii+blocksize, jj:jj+blocksize]
|
16 |
+
block[ii // blocksize, jj // blocksize, :] = block_elements.flatten()
|
17 |
+
noise_map = np.median(np.abs(block), axis=2) / 0.6745
|
18 |
+
noise_map_8u = cv2.normalize(noise_map, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)
|
19 |
+
resized_noise_map = cv2.resize(noise_map_8u, (im.shape[1], im.shape[0]), interpolation=cv2.INTER_NEAREST)
|
20 |
+
return Image.fromarray(resized_noise_map)
|