LPX commited on
Commit
c9d5b11
·
1 Parent(s): 82e63cc

(major)feat: add new functionalities for AI-generated content detection

Browse files

- Introduced a comprehensive project introduction and implementation strategy for AI-generated content detection.
- Added new Gradio tabs for wavelet blocking noise estimation, bit plane extraction, and EXIF metadata extraction.
- Enhanced the user interface to include detailed descriptions for each new feature, improving usability and accessibility.

Files changed (4) hide show
  1. app_mcp.py +87 -0
  2. utils/bitplane.py +27 -0
  3. utils/exif.py +11 -0
  4. utils/wavelet.py +20 -0
app_mcp.py CHANGED
@@ -17,6 +17,13 @@ from utils.gradient import gradient_processing
17
  from utils.minmax import preprocess as minmax_preprocess
18
  from utils.ela import genELA as ELA
19
  from forensics.registry import register_model, MODEL_REGISTRY, ModelEntry
 
 
 
 
 
 
 
20
 
21
 
22
  # Configure logging
@@ -87,6 +94,57 @@ CLASS_NAMES = {
87
 
88
  }
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  def preprocess_resize_256(image):
91
  if image.mode != 'RGB':
92
  image = image.convert('RGB')
@@ -341,12 +399,41 @@ with gr.Blocks(css="#post-gallery { overflow: hidden !important;} .grid-wrap{ ov
341
  ],
342
  outputs=outputs
343
  )
 
 
 
344
  with gr.Tab("👑 Community Forensics Preview"):
345
  temp_space = gr.load("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview", src="spaces")
346
  # preview # no idea if this will work
347
  with gr.Tab("🥇 Leaderboard"):
348
  gr.Markdown("# AI Generated / Deepfake Detection Models Leaderboard: Soon™")
349
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
 
351
  # Launch the interface
352
  demo.launch(share=True)
 
17
  from utils.minmax import preprocess as minmax_preprocess
18
  from utils.ela import genELA as ELA
19
  from forensics.registry import register_model, MODEL_REGISTRY, ModelEntry
20
+ import pywt
21
+ import cv2
22
+ import tempfile
23
+ import exiftool
24
+ from utils.wavelet import wavelet_blocking_noise_estimation
25
+ from utils.bitplane import bit_plane_extractor
26
+ from utils.exif import exif_full_dump
27
 
28
 
29
  # Configure logging
 
94
 
95
  }
96
 
97
+ QUICK_INTRO = """
98
+ ### AI-Generated Content Detection: The Tipping Point
99
+
100
+ Remember that high-stakes game of whack-a-mole between deepfakes and detection algorithms that the world leaders promised to fund and fight? Well, to no surprise, that battle ended with what seems like a quiet acceptance of defeat. Despite massive increases in 2024 for research and funding for detection systems, it came to no surprise to anyone when the largest public detection project to date was effectively rendered useless just weeks after release.
101
+
102
+ Then came the sucker-punches. Month after month, SOTA models started dropping like they were on a release calendar:
103
+ • Hyper-realistic voice clones reading your emotional tells
104
+ • Zero-shot everything making reality checks irrelevant
105
+ • Image models that upgraded "plausible" to "indistinguishable" overnight
106
+
107
+ It was terrifying. Exhilarating. Hands-down the most fascinating existential rollercoaster since crypto crashed. And we all know why detection lost: **Defense always lags offense.** Pouring billions into bigger, slower models was like building thicker castle walls while the enemy developed drone strikes.
108
+
109
+ The research exodus wasn't betrayal – it was sanity. Why battle an unwinnable arms race when there's actual progress to be made elsewhere? And let's be honest: we saw this coming. When has humanity ever resisted accelerating technology that promises... *interesting* applications? As the ancients wisely tweeted: 🔞 drives innovation.
110
+
111
+ So what now? We pivot.
112
+ ✅ Stop pretending we'll ever "solve" deepfakes. Accept they'll keep evolving.
113
+ ✅ Learn from cybersecurity: Shift from impossible prevention to damage control
114
+ ✅ Embrace and strive for radical efficiency – 10X the output at 0.1X the resource burn
115
+
116
+ But here's the silver lining, the hard-won wisdom, and the next chapter: efficiency. It's time to shift our focus from perpetual catch-up to smarter integration and acceptance.
117
+
118
+ Because our current approach? Training mammoth models on volcanic-scale energy consumption to chase diminishing returns? That's the real deepfake we should be fighting.
119
+
120
+ Next section: Practical, absurdly efficient alternatives already showing promise. It's not SOTA, but it just makes sense. ⚡
121
+
122
+ """
123
+
124
+ IMPLEMENTATION = """
125
+ ### 1. **Shift away from the belief that more data leads to better results. Rather, focus on insight-driven and "quality over quantity" datasets in training.**
126
+ * **Move Away from Terabyte-Scale Datasets**: Focus on **quality over quantity** by curating a smaller, highly diverse, and **labeled dataset** emphasizing edge cases and the latest AI generations.
127
+ * **Active Learning**: Implement active learning techniques to iteratively select the most informative samples for human labeling, reducing dataset size while maintaining effectiveness.
128
+
129
+ ### 2. **Efficient Model Architectures**
130
+ * **Adopt Lightweight, State-of-the-Art Models**: Explore models designed for efficiency like MobileNet, EfficientNet, or recent advancements in vision transformers (ViTs) tailored for forensic analysis.
131
+ * **Transfer Learning with Fine-Tuning**: Leverage pre-trained models fine-tuned on your curated dataset to leverage general knowledge while adapting to specific AI image detection tasks.
132
+
133
+ ### 3. **Multi-Modal and Hybrid Approaches**
134
+ * **Combine Image Forensics with Metadata Analysis**: Integrate insights from image processing with metadata (e.g., EXIF, XMP) for a more robust detection framework.
135
+ * **Incorporate Knowledge Graphs for AI Model Identification**: If feasible, build or utilize knowledge graphs mapping known AI models to their generation signatures for targeted detection.
136
+
137
+ ### 4. **Continuous Learning and Update Mechanism**
138
+ * **Online Learning or Incremental Training**: Implement a system that can incrementally update the model with new, strategically selected samples, adapting to new AI generation techniques.
139
+ * **Community-Driven Updates**: Establish a feedback loop with users/community to report undetected AI images, fueling model updates.
140
+
141
+ ### 5. **Evaluation and Validation**
142
+ * **Robust Validation Protocols**: Regularly test against unseen, diverse datasets including novel AI generations not present during training.
143
+ * **Benchmark Against State-of-the-Art**: Periodically compare performance with newly published detection models or techniques.
144
+
145
+
146
+ """
147
+
148
  def preprocess_resize_256(image):
149
  if image.mode != 'RGB':
150
  image = image.convert('RGB')
 
399
  ],
400
  outputs=outputs
401
  )
402
+ with gr.Tab("🙈 Project Introduction"):
403
+ gr.Markdown("# AI Generated / Deepfake Detection Models Leaderboard: Soon™")
404
+
405
  with gr.Tab("👑 Community Forensics Preview"):
406
  temp_space = gr.load("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview", src="spaces")
407
  # preview # no idea if this will work
408
  with gr.Tab("🥇 Leaderboard"):
409
  gr.Markdown("# AI Generated / Deepfake Detection Models Leaderboard: Soon™")
410
 
411
+ with gr.Tab("Wavelet Blocking Noise Estimation"):
412
+ gr.Interface(
413
+ fn=wavelet_blocking_noise_estimation,
414
+ inputs=[gr.Image(type="pil"), gr.Slider(1, 32, value=8, step=1, label="Block Size")],
415
+ outputs=gr.Image(type="pil"),
416
+ description="Wavelet-based local noise estimation"
417
+ )
418
+ with gr.Tab("Bit Plane Values"):
419
+ gr.Interface(
420
+ fn=bit_plane_extractor,
421
+ inputs=[
422
+ gr.Image(type="pil"),
423
+ gr.Dropdown(["Luminance", "Red", "Green", "Blue", "RGB Norm"], label="Channel", value="Luminance"),
424
+ gr.Slider(0, 7, value=0, step=1, label="Bit Plane"),
425
+ gr.Dropdown(["Disabled", "Median", "Gaussian"], label="Filter", value="Disabled")
426
+ ],
427
+ outputs=gr.Image(type="pil"),
428
+ description="Extract and visualize bit planes from different channels."
429
+ )
430
+ with gr.Tab("EXIF Full Dump"):
431
+ gr.Interface(
432
+ fn=exif_full_dump,
433
+ inputs=gr.Image(type="pil"),
434
+ outputs=gr.JSON(),
435
+ description="Extract all EXIF metadata from the uploaded image."
436
+ )
437
 
438
  # Launch the interface
439
  demo.launch(share=True)
utils/bitplane.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2 as cv
2
+ import numpy as np
3
+ from PIL import Image
4
+
5
+ def bit_plane_extractor(
6
+ image: Image.Image,
7
+ channel: str = "Luminance",
8
+ bit: int = 0,
9
+ filter_type: str = "Disabled"
10
+ ) -> Image.Image:
11
+ """Extract and visualize a bit plane from a selected channel of the image."""
12
+ img = np.array(image.convert("RGB"))
13
+ if channel == "Luminance":
14
+ img = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
15
+ elif channel == "RGB Norm":
16
+ b, g, r = cv.split(img.astype(np.float64))
17
+ img = np.sqrt(np.power(b, 2) + np.power(g, 2) + np.power(r, 2)).astype(np.uint8)
18
+ else:
19
+ idx = {"Red": 0, "Green": 1, "Blue": 2}[channel]
20
+ img = img[:, :, idx]
21
+ plane = cv.bitwise_and(np.full_like(img, 2 ** bit), img)
22
+ plane = cv.normalize(plane, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
23
+ if filter_type == "Median":
24
+ plane = cv.medianBlur(plane, 3)
25
+ elif filter_type == "Gaussian":
26
+ plane = cv.GaussianBlur(plane, (3, 3), 0)
27
+ return Image.fromarray(plane)
utils/exif.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ import exiftool
3
+ from PIL import Image
4
+
5
+ def exif_full_dump(image: Image.Image) -> dict:
6
+ """Extract all EXIF metadata from an image using exiftool."""
7
+ with tempfile.NamedTemporaryFile(suffix='.jpg', delete=True) as tmp:
8
+ image.save(tmp.name)
9
+ with exiftool.ExifTool() as et:
10
+ metadata = et.get_metadata(tmp.name)
11
+ return metadata
utils/wavelet.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pywt
3
+ import cv2
4
+ from PIL import Image
5
+
6
+ def wavelet_blocking_noise_estimation(image: Image.Image, blocksize: int = 8) -> Image.Image:
7
+ """Estimate local noise using wavelet blocking. Returns a PIL image of the noise map."""
8
+ im = np.array(image.convert('L'))
9
+ y = np.double(im)
10
+ cA1, (cH, cV, cD) = pywt.dwt2(y, 'db8')
11
+ cD = cD[:cD.shape[0] // blocksize * blocksize, :cD.shape[1] // blocksize * blocksize]
12
+ block = np.zeros((cD.shape[0] // blocksize, cD.shape[1] // blocksize, blocksize ** 2))
13
+ for ii in range(0, cD.shape[0] - blocksize + 1, blocksize):
14
+ for jj in range(0, cD.shape[1] - blocksize + 1, blocksize):
15
+ block_elements = cD[ii:ii+blocksize, jj:jj+blocksize]
16
+ block[ii // blocksize, jj // blocksize, :] = block_elements.flatten()
17
+ noise_map = np.median(np.abs(block), axis=2) / 0.6745
18
+ noise_map_8u = cv2.normalize(noise_map, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)
19
+ resized_noise_map = cv2.resize(noise_map_8u, (im.shape[1], im.shape[0]), interpolation=cv2.INTER_NEAREST)
20
+ return Image.fromarray(resized_noise_map)