Merge commit '999d6757e80ee17842c6e2195c3a12eeb59ff329' into pr/2
Browse files- .gitattributes +5 -0
- .gitignore +6 -1
- FLOW.gv +66 -0
- README.md +392 -6
- agents/ensemble_team.py +131 -0
- agents/{weight_management.py β ensemble_weights.py} +70 -24
- agents/monitoring_agents.py +0 -135
- agents/smart_agents.py +110 -38
- app.backup.py +5 -5
- app.py +728 -0
- app_mcp.py β app_mcp.old.py +179 -65
- app_optimized.py +987 -0
- forensics/__init__.py +15 -0
- {utils β forensics}/bitplane.py +8 -1
- {utils β forensics}/ela.py +2 -2
- forensics/exif.py +11 -0
- {utils β forensics}/gradient.py +13 -1
- {utils β forensics}/minmax.py +14 -2
- {utils β forensics}/wavelet.py +10 -2
- graph.svg +430 -0
- graph_alt.svg +431 -0
- hf_inference_logs/log_20250611031830376635.json +0 -0
- preview/.gitkeep +0 -0
- preview/1.png +3 -0
- preview/127.0.0.1_7860__.png +3 -0
- preview/2.png +3 -0
- preview/3.png +3 -0
- preview/4.png +3 -0
- preview/graph.png +0 -0
- requirements.txt +34 -15
- temp_model_config/config.json +28 -0
- temp_original_vit_config/config.json +26 -0
- utils/exif.py +0 -11
- utils/hf_logger.py +30 -16
- utils/load.py +51 -0
- {forensics β utils}/registry.py +6 -3
- utils/utils.py +2 -1
.gitattributes
CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
preview/1.png filter=lfs diff=lfs merge=lfs -text
|
37 |
+
preview/127.0.0.1_7860__.png filter=lfs diff=lfs merge=lfs -text
|
38 |
+
preview/2.png filter=lfs diff=lfs merge=lfs -text
|
39 |
+
preview/3.png filter=lfs diff=lfs merge=lfs -text
|
40 |
+
preview/4.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
@@ -7,4 +7,9 @@ forensics/__pycache__/*
|
|
7 |
*.cpython-311.pyc
|
8 |
*.cpython-312.pyc
|
9 |
test.ipynb
|
10 |
-
models/*
|
|
|
|
|
|
|
|
|
|
|
|
7 |
*.cpython-311.pyc
|
8 |
*.cpython-312.pyc
|
9 |
test.ipynb
|
10 |
+
models/*
|
11 |
+
*.cpython-310.pyc
|
12 |
+
*/inference_logs/*
|
13 |
+
hf_inference_logs/*.json
|
14 |
+
hf_inference_logs/*
|
15 |
+
.gradio/flagged/*
|
FLOW.gv
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
digraph ImagePredictionFlow {
|
2 |
+
graph [fontname="Arial", fontsize="10", rankdir="TB"]; // Top-to-Bottom, Reduced Width
|
3 |
+
node [shape="rect", style="rounded,filled", fontname="Arial", fontsize="10", fillcolor="lightblue", gradientangle="90"];
|
4 |
+
edge [fontname="Arial", fontsize="8"];
|
5 |
+
|
6 |
+
A [label="User Upload,\nPredict", fillcolor="lightgreen"]; //Shorter Label
|
7 |
+
B [label="Img Pre-proc,\nAgent Init", fillcolor="lightyellow"];
|
8 |
+
|
9 |
+
subgraph cluster_models {
|
10 |
+
label = "Model Ensemble";
|
11 |
+
style = "dashed";
|
12 |
+
|
13 |
+
ImageIn [shape=point, label="", width=0, height=0];
|
14 |
+
Model1 [label="Model1", fillcolor="lightcoral"]; //Shorter Labels
|
15 |
+
Model2 [label="Model2", fillcolor="lightcoral"];
|
16 |
+
Model3 [label="Model3", fillcolor="lightcoral"];
|
17 |
+
Model4 [label="Model4", fillcolor="lightcoral"];
|
18 |
+
Model5 [label="Model5", fillcolor="lightcoral"];
|
19 |
+
Model6 [label="Model6", fillcolor="lightcoral"];
|
20 |
+
Model7 [label="Model7", fillcolor="lightcoral"];
|
21 |
+
WeightedConsensusInput [label="Model Results", fillcolor="lightyellow"]; //Shorter Label
|
22 |
+
|
23 |
+
ImageIn -> Model1; ImageIn -> Model2; ImageIn -> Model3; ImageIn -> Model4; ImageIn -> Model5; ImageIn -> Model6; ImageIn -> Model7;
|
24 |
+
Model1 -> WeightedConsensusInput; Model2 -> WeightedConsensusInput; Model3 -> WeightedConsensusInput; Model4 -> WeightedConsensusInput; Model5 -> WeightedConsensusInput; Model6 -> WeightedConsensusInput; Model7 -> WeightedConsensusInput;
|
25 |
+
}
|
26 |
+
|
27 |
+
ContextualIntelligenceAgent [label="Contextual\nIntelligence Agent", fillcolor="lightcyan"]; //Shorter Label
|
28 |
+
BaggingAgent [label="BaggingAgent", fillcolor="lightcyan"]; //Shorter Label
|
29 |
+
DeepEnsembleAgent [label="DeepEnsemble\nAgent", fillcolor="lightcyan"]; //Shorter Label
|
30 |
+
EvolutionEnsembleAgent [label="EvolutionEnsemble\nAgent", fillcolor="lightcyan"]; //Shorter Label
|
31 |
+
|
32 |
+
WeightManager [label="Weight\nManager", fillcolor="lightcyan"]; //Shorter Label
|
33 |
+
WeightedConsensus [label="Weighted\nConsensus", fillcolor="lightgreen"];
|
34 |
+
OptimizeAgent [label="Weight\nOpt Agent", fillcolor="lightcyan"]; //Shorter Label
|
35 |
+
|
36 |
+
|
37 |
+
subgraph cluster_forensics {
|
38 |
+
label = "Forensic Analysis";
|
39 |
+
style = "dashed";
|
40 |
+
|
41 |
+
ForensicIn [shape=point, label="", width=0, height=0];
|
42 |
+
GradientProcessing [label="Gradient\nProcessing", fillcolor="lightpink"]; //Shorter Labels
|
43 |
+
MinMaxProcessing [label="MinMax\nProcessing", fillcolor="lightpink"];
|
44 |
+
ELAPorcessing [label="ELAPorcessing", fillcolor="lightpink"];
|
45 |
+
BitPlaneExtraction [label="BitPlane\nExtraction", fillcolor="lightpink"];
|
46 |
+
WaveletBasedNoiseAnalysis [label="Wavelet\nNoise Analysis", fillcolor="lightpink"];
|
47 |
+
AnomalyAgent [label="Anomaly\nDetection", fillcolor="lightcyan"]; //Shorter Label
|
48 |
+
|
49 |
+
ForensicIn -> GradientProcessing; ForensicIn -> MinMaxProcessing; ForensicIn -> ELAPorcessing; ForensicIn -> BitPlaneExtraction; ForensicIn -> WaveletBasedNoiseAnalysis;
|
50 |
+
GradientProcessing -> AnomalyAgent; MinMaxProcessing -> AnomalyAgent; ELAPorcessing -> AnomalyAgent; BitPlaneExtraction -> AnomalyAgent; WaveletBasedNoiseAnalysis -> AnomalyAgent;
|
51 |
+
}
|
52 |
+
|
53 |
+
DataLoggingAndOutput [label="Data Logging\nOutput", fillcolor="lightsalmon"];//Shorter Label
|
54 |
+
ResultsDisplay [label="Results", fillcolor="lightgreen"]; //Shorter Label
|
55 |
+
|
56 |
+
// Connections
|
57 |
+
A -> B;
|
58 |
+
B -> ImageIn;
|
59 |
+
|
60 |
+
WeightedConsensusInput -> ContextualIntelligenceAgent; WeightedConsensusInput -> BaggingAgent; WeightedConsensusInput -> DeepEnsembleAgent; WeightedConsensusInput -> EvolutionEnsembleAgent; // Connect agents
|
61 |
+
ContextualIntelligenceAgent -> WeightManager; BaggingAgent -> WeightManager; DeepEnsembleAgent -> WeightManager; EvolutionEnsembleAgent -> WeightManager; // Agents to WM
|
62 |
+
WeightManager -> WeightedConsensus;
|
63 |
+
WeightedConsensus -> OptimizeAgent; OptimizeAgent -> WeightManager;
|
64 |
+
WeightedConsensus -> ForensicIn; AnomalyAgent -> DataLoggingAndOutput;
|
65 |
+
DataLoggingAndOutput -> ResultsDisplay;
|
66 |
+
}
|
README.md
CHANGED
@@ -6,15 +6,401 @@ colorFrom: yellow
|
|
6 |
colorTo: yellow
|
7 |
sdk: gradio
|
8 |
sdk_version: 5.33.0
|
9 |
-
app_file:
|
10 |
pinned: true
|
11 |
models:
|
12 |
- aiwithoutborders-xyz/OpenSight-CommunityForensics-Deepfake-ViT
|
13 |
-
- Heem2/AI-vs-Real-Image-Detection
|
14 |
-
- haywoodsloan/ai-image-detector-deploy
|
15 |
-
- cmckinle/sdxl-flux-detector
|
16 |
-
- Organika/sdxl-detector
|
17 |
license: mit
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
---
|
19 |
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
colorTo: yellow
|
7 |
sdk: gradio
|
8 |
sdk_version: 5.33.0
|
9 |
+
app_file: app_optimized.py
|
10 |
pinned: true
|
11 |
models:
|
12 |
- aiwithoutborders-xyz/OpenSight-CommunityForensics-Deepfake-ViT
|
|
|
|
|
|
|
|
|
13 |
license: mit
|
14 |
+
tags:
|
15 |
+
- mcp-server-track
|
16 |
+
- ai-agents
|
17 |
+
- leaderboards
|
18 |
+
- incentivized-contests
|
19 |
+
- Agents-MCP-Hackathon
|
20 |
+
|
21 |
+
---
|
22 |
+
|
23 |
+
**6/18/25: YES, we are aware that updates to the submission will likely result in a disqualification.** It was never about the cash prize for us in the first place π Good luck to all hackers!
|
24 |
+
|
25 |
+
# The Detection Dilemma: The Degentic Games
|
26 |
+
|
27 |
+

|
28 |
+
|
29 |
+
The cat-and-mouse game between digital forgery and detection reached a tipping point early last year after years of escalating concern and anxiety. The most ambitious, expensive, and resource-intensive detection model was launched with actually impressive results. Impressive⦠for an embarassing two to three weeks.
|
30 |
+
|
31 |
+
Then came the knockout punches. New SOTA models emerging every few weeks, in every imaginageable domain -- image, audio, video, music. Generated images are now at a level of realism that to an untrained eye, its unable to discern if its real or fake. [TO-DO: Add Citation to the study]
|
32 |
+
|
33 |
+
And let's be honest: we saw this coming. When has humanity ever resisted accelerating technology that promises... *interesting* applications? As the ancients wisely tweeted: π drives innovation.
|
34 |
+
|
35 |
+
It's time for a reset. Quit crying and get ready. Didn't you hear? The long awaited Degentic Games is starting soon, and your model sucks.
|
36 |
+
|
37 |
+
## Re-Thinking Detection
|
38 |
+
|
39 |
+
### 1. **Shift away from the belief that more data leads to better results. Rather, focus on insight-driven and "quality over quantity" datasets in training.**
|
40 |
+
* **Move Away from Terabyte-Scale Datasets**: Focus on **quality over quantity** by curating a smaller, highly diverse, and **labeled dataset** emphasizing edge cases and the latest AI generations.
|
41 |
+
* **Active Learning**: Implement active learning techniques to iteratively select the most informative samples for human labeling, reducing dataset size while maintaining effectiveness.
|
42 |
+
|
43 |
+
### 2. **Efficient Model Architectures**
|
44 |
+
* **Adopt Lightweight, State-of-the-Art Models**: Explore models designed for efficiency like MobileNet, EfficientNet, or recent advancements in vision transformers (ViTs) tailored for forensic analysis.
|
45 |
+
* **Transfer Learning with Fine-Tuning**: Leverage pre-trained models fine-tuned on your curated dataset to leverage general knowledge while adapting to specific AI image detection tasks.
|
46 |
+
|
47 |
+
### 3. **Multi-Modal and Hybrid Approaches**
|
48 |
+
* **Combine Image Forensics with Metadata Analysis**: Integrate insights from image processing with metadata (e.g., EXIF, XMP) for a more robust detection framework.
|
49 |
+
* **Incorporate Knowledge Graphs for AI Model Identification**: If feasible, build or utilize knowledge graphs mapping known AI models to their generation signatures for targeted detection.
|
50 |
+
|
51 |
+
### 4. **Continuous Learning and Update Mechanism**
|
52 |
+
* **Online Learning or Incremental Training**: Implement a system that can incrementally update the model with new, strategically selected samples, adapting to new AI generation techniques.
|
53 |
+
* **Community-Driven Updates**: Establish a feedback loop with users/community to report undetected AI images, fueling model updates.
|
54 |
+
|
55 |
+
### 5. **Evaluation and Validation**
|
56 |
+
* **Robust Validation Protocols**: Regularly test against unseen, diverse datasets including novel AI generations not present during training.
|
57 |
+
* **Benchmark Against State-of-the-Art**: Periodically compare performance with newly published detection models or techniques.
|
58 |
+
|
59 |
+
|
60 |
+
### Core Roadmap
|
61 |
+
|
62 |
+
[x] Project Introduction
|
63 |
+
[ ] Agents Released into Wild
|
64 |
+
[ ] Whitepaper / Arxiv Release
|
65 |
+
[ ] Public Participation
|
66 |
+
|
67 |
+
|
68 |
+
|
69 |
+
## Functions Available for LLM Calls via MCP
|
70 |
+
|
71 |
+
This document outlines the functions available for programmatic invocation by LLMs through the MCP (Multi-Cloud Platform) server, as defined in `mcp-deepfake-forensics/app.py`.
|
72 |
+
|
73 |
+
## 1. `full_prediction`
|
74 |
+
|
75 |
+
### Description
|
76 |
+
This function processes an uploaded image to predict whether it is AI-generated or real, utilizing an ensemble of deepfake detection models and advanced forensic analysis techniques. It also incorporates intelligent agents for context inference, weight management, and anomaly detection.
|
77 |
+
|
78 |
+
### API Names
|
79 |
+
- `predict`
|
80 |
+
|
81 |
+
### Parameters
|
82 |
+
- `img` (str): The input image to be analyzed, provided as a file path.
|
83 |
+
- `confidence_threshold` (float): A value between 0.0 and 1.0 (default: 0.7) that determines the confidence level required for a model to label an image as "AI" or "REAL". If neither score meets this threshold, the label will be "UNCERTAIN".
|
84 |
+
- `rotate_degrees` (float): The maximum degree by which to rotate the image (default: 0). If greater than 0, "rotate" augmentation is applied.
|
85 |
+
- `noise_level` (float): The level of noise to add to the image (default: 0). If greater than 0, "add_noise" augmentation is applied.
|
86 |
+
- `sharpen_strength` (float): The strength of the sharpening effect to apply (default: 0). If greater than 0, "sharpen" augmentation is applied.
|
87 |
+
|
88 |
+
### Returns
|
89 |
+
- `img_pil` (PIL Image): The processed image (original or augmented).
|
90 |
+
- `cleaned_forensics_images` (list of PIL Image): A list of images generated by various forensic analysis techniques (ELA, gradient, minmax, bitplane). These include:
|
91 |
+
- Original augmented image
|
92 |
+
- ELA analysis (multiple passes)
|
93 |
+
- Gradient processing (multiple variations)
|
94 |
+
- MinMax processing (multiple variations)
|
95 |
+
- Bit Plane extraction
|
96 |
+
- `table_rows` (list of lists): A list of lists representing the model predictions, suitable for display in a Gradio Dataframe. Each inner list contains: Model Name, Contributor, AI Score, Real Score, and Label.
|
97 |
+
- `json_results` (str): A JSON string containing the raw model prediction results for debugging purposes.
|
98 |
+
- `consensus_html` (str): An HTML string representing the final consensus label ("AI", "REAL", or "UNCERTAIN"), styled with color.
|
99 |
+
|
100 |
+
## 2. `noise_estimation`
|
101 |
+
|
102 |
+
### Description
|
103 |
+
Analyzes image noise patterns using wavelet decomposition. This tool helps detect compression artifacts and artificial noise patterns that may indicate image manipulation. Higher noise levels in specific regions can reveal areas of potential tampering.
|
104 |
+
|
105 |
+
### API Name
|
106 |
+
- `tool_waveletnoise`
|
107 |
+
|
108 |
+
### Parameters
|
109 |
+
- `image` (PIL Image): The input image to analyze.
|
110 |
+
- `block_size` (int): The size of the blocks for wavelet analysis (default: 8, range: 1-32).
|
111 |
+
|
112 |
+
### Returns
|
113 |
+
- `output_image` (PIL Image): An image visualizing the noise patterns.
|
114 |
+
|
115 |
+
## 3. `bit_plane_extractor`
|
116 |
+
|
117 |
+
### Description
|
118 |
+
Extracts and visualizes individual bit planes from different color channels. This forensic tool helps identify hidden patterns and artifacts in image data that may indicate manipulation. Different bit planes can reveal inconsistencies in image processing or editing.
|
119 |
+
|
120 |
+
### API Name
|
121 |
+
- `tool_bitplane`
|
122 |
+
|
123 |
+
### Parameters
|
124 |
+
- `image` (PIL Image): The input image to analyze.
|
125 |
+
- `channel` (str): The color channel to extract the bit plane from. Possible values: "Luminance", "Red", "Green", "Blue", "RGB Norm" (default: "Luminance").
|
126 |
+
- `bit_plane` (int): The bit plane index to extract (0-7, default: 0).
|
127 |
+
- `filter_type` (str): A filter to apply to the extracted bit plane. Possible values: "Disabled", "Median", "Gaussian" (default: "Disabled").
|
128 |
+
|
129 |
+
### Returns
|
130 |
+
- `output_image` (PIL Image): An image visualizing the extracted bit plane.
|
131 |
+
|
132 |
+
## 4. `ELA`
|
133 |
+
|
134 |
+
### Description
|
135 |
+
Performs Error Level Analysis to detect re-saved JPEG images, which can indicate tampering. ELA highlights areas of an image that have different compression levels.
|
136 |
+
|
137 |
+
### API Name
|
138 |
+
- `tool_ela`
|
139 |
+
|
140 |
+
### Parameters
|
141 |
+
- `img` (PIL Image): Input image to analyze.
|
142 |
+
- `quality` (int): JPEG compression quality (1-100, default: 75).
|
143 |
+
- `scale` (int): Output multiplicative gain (1-100, default: 50).
|
144 |
+
- `contrast` (int): Output tonality compression (0-100, default: 20).
|
145 |
+
- `linear` (bool): Whether to use linear difference (default: False).
|
146 |
+
- `grayscale` (bool): Whether to output grayscale image (default: False).
|
147 |
+
|
148 |
+
### Returns
|
149 |
+
- `processed_ela_image` (PIL Image): The processed ELA image.
|
150 |
+
|
151 |
+
## 5. `gradient_processing`
|
152 |
+
|
153 |
+
### Description
|
154 |
+
Applies gradient filters to an image to enhance edges and transitions, which can reveal inconsistencies due to manipulation.
|
155 |
+
|
156 |
+
### API Name
|
157 |
+
- `tool_gradient_processing`
|
158 |
+
|
159 |
+
### Parameters
|
160 |
+
- `image` (PIL Image): The input image to analyze.
|
161 |
+
- `intensity` (int): Intensity of the gradient effect (0-100, default: 90).
|
162 |
+
- `blue_mode` (str): Mode for the blue channel. Possible values: "Abs", "None", "Flat", "Norm" (default: "Abs").
|
163 |
+
- `invert` (bool): Whether to invert the gradients (default: False).
|
164 |
+
- `equalize` (bool): Whether to equalize the histogram (default: False).
|
165 |
+
|
166 |
+
### Returns
|
167 |
+
- `gradient_image` (PIL Image): The image with gradient processing applied.
|
168 |
+
|
169 |
+
## 6. `minmax_process`
|
170 |
+
|
171 |
+
### Description
|
172 |
+
Analyzes local pixel value deviations to detect subtle changes in image data, often indicative of digital forgeries.
|
173 |
+
|
174 |
+
### API Name
|
175 |
+
- `tool_minmax_processing`
|
176 |
+
|
177 |
+
### Parameters
|
178 |
+
- `image` (PIL Image): The input image to analyze.
|
179 |
+
- `channel` (int): The color channel to process. Possible values: 0 (Grayscale), 1 (Blue), 2 (Green), 3 (Red), 4 (RGB Norm) (default: 4).
|
180 |
+
- `radius` (int): The radius for local pixel analysis (0-10, default: 2).
|
181 |
+
|
182 |
+
### Returns
|
183 |
+
- `minmax_image` (PIL Image): The image with minmax processing applied.
|
184 |
+
|
185 |
+
## 7. `augment_image_interface`
|
186 |
+
|
187 |
+
### Description
|
188 |
+
Applies various augmentation techniques to an image.
|
189 |
+
|
190 |
+
### API Name
|
191 |
+
- `augment_image`
|
192 |
+
|
193 |
+
### Parameters
|
194 |
+
- `img` (PIL Image): The input image to augment.
|
195 |
+
- `augment_methods` (list of str): A list of augmentation methods to apply. Possible values: "rotate", "add_noise", "sharpen".
|
196 |
+
- `rotate_degrees` (float): The degrees to rotate the image (0-360).
|
197 |
+
- `noise_level` (float): The level of noise to add (0-100).
|
198 |
+
- `sharpen_strength` (float): The strength of the sharpening effect (0-200).
|
199 |
+
|
200 |
+
### Returns
|
201 |
+
- `augmented_img` (PIL Image): The augmented image.
|
202 |
+
|
203 |
+
## 8. `community_forensics_preview`
|
204 |
+
|
205 |
+
### Description
|
206 |
+
Provides a quick and simple prediction using our strongest model.
|
207 |
+
|
208 |
+
### API Name
|
209 |
+
- `quick_predict`
|
210 |
+
|
211 |
+
### Parameters
|
212 |
+
- `img` (str): The input image to analyze, provided as a file path.
|
213 |
+
|
214 |
+
### Returns
|
215 |
+
- (HTML): An HTML output from the loaded Gradio Space.
|
216 |
+
|
217 |
+
---
|
218 |
+
|
219 |
+
# Behind the Scenes: Image Prediction Flow
|
220 |
+
|
221 |
+
When you upload an image for analysis and click the "Predict" button, the following steps occur:
|
222 |
+
|
223 |
+
### 1. Image Pre-processing and Agent Initialization
|
224 |
+
|
225 |
+
* **Image Conversion**: The input image is first ensured to be a PIL (Pillow) Image object. If it's a file path, it's loaded and converted to PIL. If it's a NumPy array, it's converted. The image is then ensured to be in RGB format.
|
226 |
+
* **Agent Setup**: Several intelligent agents are initialized to assist in the process:
|
227 |
+
* `EnsembleMonitorAgent`: Monitors the performance of individual models.
|
228 |
+
* `ModelWeightManager`: Manages and adjusts the weights of different models.
|
229 |
+
* `WeightOptimizationAgent`: Optimizes model weights based on performance.
|
230 |
+
* `SystemHealthAgent`: Monitors the system's resource usage (e.g., memory, GPU).
|
231 |
+
* `ContextualIntelligenceAgent`: Infers context tags from the image to aid in weight adjustment.
|
232 |
+
* `ForensicAnomalyDetectionAgent`: Analyzes forensic outputs for signs of manipulation.
|
233 |
+
* **System Health Monitoring**: The `SystemHealthAgent` performs an initial check of system resources.
|
234 |
+
* **Image Augmentation (Optional)**: If `rotate_degrees`, `noise_level`, or `sharpen_strength` are provided, the image is augmented accordingly using "rotate", "add_noise", and "sharpen" methods internally. Otherwise, the original image is used.
|
235 |
+
|
236 |
+
### 2. Initial Model Predictions
|
237 |
+
|
238 |
+
* **Individual Model Inference**: The augmented (or original) image is passed through each of the registered deepfake detection models (`model_1` through `model_7`).
|
239 |
+
* **Performance Monitoring**: For each model, the `EnsembleMonitorAgent` tracks its prediction label, confidence score, and inference time.
|
240 |
+
* **Result Collection**: The raw prediction results (AI Score, Real Score, predicted Label) from each model are stored.
|
241 |
+
|
242 |
+
### 3. Smart Agent Processing and Weighted Consensus
|
243 |
+
|
244 |
+
* **Contextual Intelligence**: The `ContextualIntelligenceAgent` analyzes the image's metadata (width, height, mode) and the raw model predictions to infer relevant context tags (e.g., "generated by Midjourney", "likely real photo"). This helps in making more informed decisions about model reliability.
|
245 |
+
* **Dynamic Weight Adjustment**: The `ModelWeightManager` adjusts the influence (weights) of each individual model's prediction. This adjustment takes into account the initial model predictions, their confidence scores, and the detected context tags. Note that `simple_prediction` (Community Forensics model) is given a significantly higher base weight.
|
246 |
+
* **Weighted Consensus Calculation**: A final prediction label ("AI", "REAL", or "UNCERTAIN") is determined by combining the individual model predictions using their adjusted weights. Models with higher confidence and relevance to the detected context contribute more to the final decision.
|
247 |
+
* **Performance Analysis (for Optimization)**: The `WeightOptimizationAgent` analyzes the final consensus label to continually improve the weight adjustment strategy for future predictions.
|
248 |
+
|
249 |
+
### 4. Forensic Processing
|
250 |
+
|
251 |
+
* **Multiple Forensic Techniques**: The original image is subjected to various forensic analysis techniques to reveal hidden artifacts that might indicate manipulation:
|
252 |
+
* **Gradient Processing**: Highlights edges and transitions in the image.
|
253 |
+
* **MinMax Processing**: Reveals deviations in local pixel values.
|
254 |
+
* **ELA (Error Level Analysis)**: Performed in multiple passes (grayscale and color, with varying contrast) to detect areas of different compression levels, which can suggest tampering.
|
255 |
+
* **Bit Plane Extraction**: Extracts and visualizes individual bit planes.
|
256 |
+
* **Wavelet-Based Noise Analysis**: Analyzes noise patterns using wavelet decomposition.
|
257 |
+
* **Forensic Anomaly Detection**: The `ForensicAnomalyDetectionAgent` analyzes the outputs of these forensic tools and their descriptions to identify potential anomalies or inconsistencies that could indicate image manipulation.
|
258 |
+
|
259 |
+
### 5. Data Logging and Output Generation
|
260 |
+
|
261 |
+
* **Inference Data Logging**: All relevant data from the current prediction, including original image, inference parameters, individual model predictions, ensemble output, forensic images, and agent monitoring data, is logged to a Hugging Face dataset for continuous improvement and analysis.
|
262 |
+
* **Output Preparation**: The results are formatted for display in the Gradio interface:
|
263 |
+
* The processed image (augmented or original) is prepared.
|
264 |
+
* The forensic analysis images are collected for display in a gallery.
|
265 |
+
* A table summarizing each model's prediction (Model, Contributor, AI Score, Real Score, Label) is generated.
|
266 |
+
* The raw JSON output of model results is prepared for debugging.
|
267 |
+
* The final consensus label is prepared with appropriate styling.
|
268 |
+
* **Data Type Conversion**: Numerical values (like AI Score, Real Score) are converted to standard Python floats to ensure proper JSON serialization.
|
269 |
+
|
270 |
+
---
|
271 |
+
## Flow-Chart
|
272 |
+
|
273 |
+
<img src="graph_alt.svg">
|
274 |
+
|
275 |
+
|
276 |
+
## Roadmap & Features
|
277 |
+
|
278 |
+
### In Progress & Pending Tasks
|
279 |
+
|
280 |
+
| Task | Status | Priority | Notes |
|
281 |
+
|------|--------|----------|-------|
|
282 |
+
| [x] Set up basic ensemble model architecture | β
Completed | High | Core framework established |
|
283 |
+
| [x] Implement initial forensic analysis tools | β
Completed | High | ELA, Gradient, MinMax processing |
|
284 |
+
| [x] Create intelligent agent system | β
Completed | High | All monitoring agents implemented |
|
285 |
+
| [x] Refactor Gradio interface for MCP | β
Completed | Medium | User-friendly web interface |
|
286 |
+
| [x] Integrate multiple deepfake detection models | β
Completed | High | 7 models successfully integrated |
|
287 |
+
| [x] Implement weighted consensus algorithm | β
Completed | High | Dynamic weight adjustment working |
|
288 |
+
| [x] Add image augmentation capabilities | β
Completed | Medium | Rotation, noise, sharpening features |
|
289 |
+
| [x] Set up data logging to Hugging Face | β
Completed | Medium | Continuous improvement pipeline |
|
290 |
+
| [x] Create system health monitoring | β
Completed | Medium | Resource usage tracking |
|
291 |
+
| [x] Implement contextual intelligence analysis | β
Completed | Medium | Context tag inference system |
|
292 |
+
| [x] Expose `augment_image` as a Gradio interface | β
Completed | Medium | New "Image Augmentation" tab added |
|
293 |
+
| [ ] Implement real-time model performance monitoring | π· In Progress | High | Add live metrics dashboard |
|
294 |
+
| [ ] Add support for video deepfake detection | Pending | Medium | Extend current image-based system |
|
295 |
+
| [ ] Optimize forensic analysis processing speed | π· In Progress | High | Current ELA processing is slow |
|
296 |
+
| [ ] Implement batch processing for multiple images | π· In Progress | Medium | Improve throughput for bulk analysis |
|
297 |
+
| [ ] Add model confidence threshold configuration | Pending | Low | Allow users to adjust sensitivity |
|
298 |
+
| [ ] Create test suite | Pending | High | Unit tests for all agents and models |
|
299 |
+
| [ ] Implement model versioning and rollback | Pending | Medium | Track model performance over time |
|
300 |
+
| [ ] Add export functionality for analysis reports | Pending | Low | PDF/CSV export options |
|
301 |
+
| [ ] Optimize memory usage for large images | π· In Progress | High | Handle 4K+ resolution images |
|
302 |
+
| [ ] Add support for additional forensic techniques | π· In Progress | Medium | Consider adding noise analysis |
|
303 |
+
| [ ] Implement user authentication system | Pending | Low | For enterprise deployment |
|
304 |
+
| [ ] Create API documentation | π· In Progress | Medium | OpenAPI/Swagger specs |
|
305 |
+
| [ ] Add model ensemble validation metrics | Pending | High | Cross-validation for weight optimization |
|
306 |
+
| [ ] Implement caching for repeated analyses | Pending | Medium | Reduce redundant processing |
|
307 |
+
| [ ] Add support for custom model integration | Pending | Low | Plugin architecture for new models |
|
308 |
+
|
309 |
+
### Legend
|
310 |
+
- **Priority**: High (Critical), Medium (Important), Low (Nice to have)
|
311 |
+
- **Status**: Pending, π· In Progress, β
Completed, π» Blocked
|
312 |
+
|
313 |
+
---
|
314 |
+
|
315 |
+
Digital Forensics Implementation
|
316 |
+
|
317 |
+
|
318 |
+
Here's the updated table with an additional column providing **instructions on how to use these tools with vision LLMs** (e.g., CLIP, Vision Transformers, or CNNs) for effective AI content detection:
|
319 |
+
|
320 |
+
---
|
321 |
+
|
322 |
+
### **Top 20 Tools for AI Content Detection (with Vision LLM Integration Guidance)**
|
323 |
+
|
324 |
+
| Status | Rank | Tool/Algorithm | Reason | **Agent Guidance / Instructions** |
|
325 |
+
|--------|------|----------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------|
|
326 |
+
| β
| 1 | Noise Separation | Detect synthetic noise patterns absent in natural images. | Train the LLM on noise-separated image patches to recognize AI-specific noise textures (e.g., overly smooth or missing thermal noise). |
|
327 |
+
| π· | 2 | EXIF Full Dump | AI-generated images lack valid metadata (e.g., camera model, geolocation). | Input the image *and its metadata as text* to a **multimodal LLM** (e.g., image + metadata caption). Flag inconsistencies (e.g., missing GPS, invalid timestamps). |
|
328 |
+
| β
| 3 | Error Level Analysis (ELA) | Reveals compression artifacts unique to AI-generated images. | Preprocess images via ELA before input to the LLM. Train the model to detect high-error regions indicative of synthetic content. |
|
329 |
+
| π· | 4 | JPEG Ghost Maps | Identifies compression history anomalies. | Use ghost maps as a separate input channel (e.g., overlay ELA results on the RGB image) to train the LLM on synthetic vs. natural compression traces. |
|
330 |
+
| π· | 5 | Copy-Move Forgery | AI models often clone/reuse elements. | Train the LLM to detect duplicated regions via frequency analysis or gradient-based saliency maps (e.g., using a Siamese network to compare image segments). |
|
331 |
+
| β
| 6 | Channel Histograms | Skewed color distributions in AI-generated images. | Feed the **histogram plots** as additional input (e.g., as a grayscale image) to highlight unnatural color profiles in the LLM. |
|
332 |
+
| π· | 7 | Pixel Statistics | Unnatural RGB value deviations in AI-generated images. | Train the LLM on datasets with metadata tags indicating mean/max/min RGB values, using these stats as part of the training signal. |
|
333 |
+
| π· | 8 | JPEG Quality Estimation | AI-generated content may have atypical JPEG quality settings. | Preprocess the image to expose JPEG quality artifacts (e.g., blockiness) and train the LLM to identify these patterns via loss functions tuned to compression. |
|
334 |
+
| π· | 9 | Resampling Detection | AI tools may upscale/rotate images, leaving subpixel-level artifacts. | Use **frequency analysis** modules in the LLM (e.g., Fourier-transformed images) to detect MoirΓ© patterns or grid distortions from resampling. |
|
335 |
+
| β
| 10 | PCA Projection | Highlights synthetic color distributions. | Apply PCA to reduce color dimensions and input the 2D/3D projection to the LLM as a simplified feature space. |
|
336 |
+
| β
| 11 | Bit Planes Values | Detect synthetic noise patterns absent in natural images. | Analyze individual bit planes (e.g., bit plane 1β8) and feed the binary images to the LLM to train on AI-specific bit-plane anomalies. |
|
337 |
+
| π· | 12 | Median Filtering Traces | AI pre/post-processing steps mimic median filtering. | Train the LLM on synthetically filtered images to recognize AI-applied diffusion artifacts. |
|
338 |
+
| β
| 13 | Wavelet Threshold | Identifies AI-generated texture inconsistencies. | Use wavelet-decomposed images as input channels to the LLM to isolate synthetic textures vs. natural textures. |
|
339 |
+
| β
| 14 | Frequency Split | AI may generate unnatural gradients or sharpness. | Separate high/low frequencies and train the LLM to detect missing high-frequency content in AI-generated regions (e.g., over-smoothed edges). |
|
340 |
+
| π· | 15 | PRNU Identification | Absence of sensor-specific noise in AI-generated images. | Train the LLM on PRNU-noise databases to detect the absence or mismatch of sensor-specific noise in unlabeled images. |
|
341 |
+
| π· | 16 | EXIF Tampering Detection | AI may falsify metadata. | Flag images with inconsistent Exif hashes (e.g., mismatched EXIF/visual content) and use metadata tags as training labels. |
|
342 |
+
| π· | 17 | Composite Splicing | AI-generated images often stitch elements with inconsistencies. | Use **edge-aware models** (e.g., CRFL-like architectures) to detect lighting/shadow mismatches in spliced regions. |
|
343 |
+
| π· | 18 | RGB/HSV Plots | AI-generated images have unnatural color distributions. | Input RGB/HSV channel plots as 1D signals to the LLM's classifier head, along with the original image. |
|
344 |
+
| π· | 19 | Dead/Hot Pixel Analysis | Absence of sensor-level imperfections in AI-generated images. | Use pre-trained sensor noise databases to train the LLM to flag images missing dead/hot pixels. |
|
345 |
+
| π· | 20 | File Digest (Hashing) | Compare to known AI-generated image hashes for rapid detection. | Use hash values as binary tags in a training dataset (e.g., "hash matches known AI model" β label as synthetic). |
|
346 |
+
|
347 |
+
### Legend
|
348 |
+
- **Priority**: High (Critical), Medium (Important), Low (Nice to have)
|
349 |
+
- **Status**: π· In-Progress, β
Completed, π» Blocked
|
350 |
+
|
351 |
---
|
352 |
|
353 |
+
### **Hybrid Input Table for AI Content Detection (Planned)**
|
354 |
+
|
355 |
+
| **Strategy #** | **Description** | **Input Components** | **Agent Guidance / Instructions** |
|
356 |
+
|----------------|----------------------------------------------------------------------------------|--------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------|
|
357 |
+
| 1 | Combine ELA (Error Level Analysis) with RGB images for texture discrimination. | ELA-processed image + original RGB image (stacked as 4D tensor). | Use a **multi-input CNN** to process ELA maps and RGB images in parallel, or concatenate them into a 6-channel input (3 RGB + 3 ELA). |
|
358 |
+
| 2 | Use metadata (Exif) and visual content as a **multimodal pair**. | Visual image + Exif metadata (as text caption). | Feed the image and metadata text into a **multimodal LLM** (e.g., CLIP or MMBT). Use a cross-attention module to align metadata with visual features. |
|
359 |
+
| 3 | Add **histogram plots** as a 1D auxiliary input for color distribution analysis. | Image (3D input) + histogram plots (1D vector or 2D grayscale image). | Train a **dual-stream model** (CNN for image + LSTM/Transformer for histogram data) to learn the relationship between visual and statistical features. |
|
360 |
+
| 4 | Combine **frequency split images** (high/low) with RGB for texture detection. | High-frequency image + low-frequency image + RGB image (as 3+3+3 input channels). | Use a **frequency-aware CNN** to process each frequency band with separate filters, then merge features for classification. |
|
361 |
+
| 5 | Train a model on **bit planes values** alongside the original image. | Bit plane images (binary black-and-white layers) + original RGB image. | Stack or concatenate bit plane images with RGB channels before inputting to the LLM. For example, combine 3 bit planes with 3 RGB channels. |
|
362 |
+
| 6 | Use **PRNU noise maps** and visual features to detect synthetic content. | PRNU-noise map (grayscale) + RGB image (3D input). | Train a **Siamese network** to compare PRNU maps with real-world noise databases. If PRNU is absent or mismatched, flag the image as synthetic. |
|
363 |
+
| 7 | Stack **hex-editor-derived metadata** (e.g., file header signatures) as a channel. | Hex-derived binary patterns (encoded as 1D or 2D data) + RGB image. | Use a **transformer with 1D hex embeddings** as a metadata input, cross-attending with a ViT (Vision Transformer) for RGB analysis. |
|
364 |
+
| 8 | Add **dead/hot pixel detection maps** as a mask to highlight sensor artifacts. | Dead/hot pixel mask (binary 2D map) + RGB image. | Concatenate the mask with the RGB image as a 4th channel. Train a U-Net-style model to detect synthetic regions where the mask lacks sensor patterns. |
|
365 |
+
| 9 | Use **PCA-reduced color projections** as a simplified input for LLMs. | PCA-transformed color embeddings (2D/3D projection) + original image. | Train a **transformer** to learn how PCA-projected color distributions differ between natural and synthetic images. |
|
366 |
+
| 10 | Integrate **wavelet-decomposed subbands** with RGB for texture discrimination. | Wavelet subbands (LL, LH, HL, HH) + RGB image (stacked as 7D input). | Design a **wavelet-aware CNN** to process each subband separately before global pooling and classification. |
|
367 |
+
|
368 |
+
---
|
369 |
+
|
370 |
+
### **Key Integration Tips for Hybrid Inputs**
|
371 |
+
1. **Multimodal Models**
|
372 |
+
- Use models like **CLIP**, **BLIP**, or **MBT** to align metadata (text) with visual features (images).
|
373 |
+
- For example: Combine a **ViT** (for image processing) with a **Transformer** (for Exif metadata or histograms).
|
374 |
+
|
375 |
+
2. **Feature Fusion Techniques**
|
376 |
+
- **Early fusion**: Concatenate inputs (e.g., ELA + RGB) before the first layer.
|
377 |
+
- **Late fusion**: Process inputs separately and merge features before final classification.
|
378 |
+
- **Cross-modal attention**: Use cross-attention to align metadata with visual features (e.g., Exif text and PRNU noise maps).
|
379 |
+
|
380 |
+
3. **Preprocessing for Hybrid Inputs**
|
381 |
+
- Normalize metadata and image data to the same scale (e.g., 0β1).
|
382 |
+
- Convert 1D histogram data into 2D images (e.g., heatmap-like plots) for consistent input formats.
|
383 |
+
|
384 |
+
4. **Loss Functions for Hybrid Tasks**
|
385 |
+
- Use **multi-task loss** (e.g., classification + regression) if metadata is involved.
|
386 |
+
- For consistency checks (e.g., metadata vs. visual content), use **triplet loss** or **contrastive loss**.
|
387 |
+
|
388 |
+
---
|
389 |
+
### **Overview of Multi-Model Consensus Methods in ML**
|
390 |
+
| **Method** | **Category** | **Description** | **Key Advantages** | **Key Limitations** | **Weaknesses** | **Strengths** |
|
391 |
+
|--------------------------|----------------------------|--------------------------------------------------|---------------------------------------------------|--------------------------------------------------------------|----------------------------------------|--------------------------------------------------------------------------------|
|
392 |
+
| **Bagging (e.g., Random Forest)** | **Traditional Ensembles** | Trains multiple models on bootstrapped data subsets, aggregating predictions | Reduces overfitting (~variance reduction) | Computationally costly for large datasets; models can be correlated | Not robust to adversarial attacks | Simple to implement; robust to noisy data; handles high-dimensional data well |
|
393 |
+
| **Boosting (e.g., XGBoost, LightGBM)** | **Traditional Ensembles** | Iteratively corrects errors using weighted models | High accuracy on structured/tabular data | Risk of overfitting; sensitive to noisy data | Computationally intensive | Dominates in competitions (e.g., Kaggle); scalable for medium datasets |
|
394 |
+
| **Stacking** | **Traditional Ensembles** | Combines predictions via a meta-learner | Can outperform individual models; flexible | Increased complexity and data leakage risk | Requires careful hyperparameter tuning | Excels in combining diverse models (e.g., trees + SVMs + linear models) |
|
395 |
+
| **Deep Ensembles** | **Deep Learning Ensembles**| Multiple independently trained neural networks | Uncertainty estimation; robust to data shifts | High computational cost; memory-heavy | Model coordination challenges | State-of-the-art in safety-critical domains (e.g., medical imaging, autonomous vehicles) |
|
396 |
+
| **Snapshot Ensembles** | **Deep Learning Ensembles**| Saves models at different optimization stages | Efficient (only one training run) | Limited diversity (same architecture/init) | Requires careful checkpoint selection | Lightweight for tasks like on-device deployment |
|
397 |
+
| **Monte Carlo Dropout** | **Approximate Ensembles** | Applies dropout at inference to simulate many models | Free ensemble (during testing) | Approximates uncertainty poorly compared to deep ensembles | Limited diversity | Cheap and simple; useful for quick uncertainty estimates |
|
398 |
+
| **Mixture of Experts (MoE)** | **Scalable Ensembles** | Specialized sub-models (experts) with a gating mechanism | Efficient scaling (only activate sub-models) | Training instability; uneven expert utilization | Requires expert/gate orchestration | Dominates large-scale applications like Switch Transformers and Hyper-Cloud systems |
|
399 |
+
| **Bayesian Neural Networks (BNNs)** | **Probabilistic Ensembles** | Models weights as probability distributions | Built-in uncertainty quantification | Intractable to train exactly; approximations needed | Difficult optimization | Essential for risk-averse applications (robotics, finance) |
|
400 |
+
| **Ensemble Knowledge Distillation** | **Model Compression** | Trains a single model to mimic an ensemble | Reduces compute/memory demands | Loses some ensemble benefits (diversity, uncertainty) | Relies on a high-quality teacher ensemble | Enables deployment of ensemble-like performance in compact models (edge devices) |
|
401 |
+
| **Noisy Student Training** | **Semi-Supervised Ensembles** | Iterative self-training with teacher-student loops | Uses unlabeled data effectively; improves robustness| Needs large unlabeled data and computational resources | Vulnerable to error propagation | State-of-the-art in semi-supervised settings (e.g., NLP) |
|
402 |
+
| **Evolutionary Ensembles** | **Dynamic Ensembles** | Uses genetic algorithms to evolve model populations | Adaptive diversity generation | High time/cost for evolution; niche use cases | Hard to interpret | Useful for non-stationary environments/on datasets with drift |
|
403 |
+
| **Consensus Networks** | **NLP/Serverless Ensembles** | Distributes models across clients/aggregates votes | Decentralized privacy-preserving predictions | Communication overhead; non-i.i.d. data conflicts | Requires synchronized coordination | Fed into federated learning systems (e.g., healthcare, finance) |
|
404 |
+
| **Hybrid Systems** | **Cross-Architecture Ensembles** | Combines models (e.g., CNNs, GNNs, transformers) | Captures multi-modal or heterogeneous patterns | Integration complexity; delayed inference | Model conflicts | Dominates in tasks requiring domain-specific reasoning (e.g., drug discovery) |
|
405 |
+
| **Self-Supervised Ensembles** | **Vision/NLP** | Uses contrastive learning with multiple models (e.g., MoCo, SimCLR) | Data-efficient; strong performance on downstream tasks | Training is resource-heavy; requires pre-training at scale | Low interpretability | Foundations for modern vision/NLP architectures (e.g., resists data scarcity) |
|
406 |
+
---
|
agents/ensemble_team.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import time
|
3 |
+
import torch
|
4 |
+
import psutil # Ensure psutil is imported here as well
|
5 |
+
import GPUtil
|
6 |
+
from datetime import datetime, timedelta
|
7 |
+
import gc # Import garbage collector
|
8 |
+
|
9 |
+
logger = logging.getLogger(__name__)
|
10 |
+
|
11 |
+
class EnsembleMonitorAgent:
|
12 |
+
def __init__(self):
|
13 |
+
logger.info("Initializing EnsembleMonitorAgent.")
|
14 |
+
self.performance_metrics = {}
|
15 |
+
self.alerts = []
|
16 |
+
|
17 |
+
def monitor_prediction(self, model_id, prediction_label, confidence_score, inference_time):
|
18 |
+
logger.info(f"Monitoring prediction for model '{model_id}'. Label: {prediction_label}, Confidence: {confidence_score:.2f}, Time: {inference_time:.4f}s")
|
19 |
+
if model_id not in self.performance_metrics:
|
20 |
+
self.performance_metrics[model_id] = {
|
21 |
+
"total_predictions": 0,
|
22 |
+
"correct_predictions": 0, # This would require ground truth, which we don't have here.
|
23 |
+
"total_confidence": 0.0,
|
24 |
+
"total_inference_time": 0.0
|
25 |
+
}
|
26 |
+
|
27 |
+
metrics = self.performance_metrics[model_id]
|
28 |
+
metrics["total_predictions"] += 1
|
29 |
+
metrics["total_confidence"] += confidence_score
|
30 |
+
metrics["total_inference_time"] += inference_time
|
31 |
+
|
32 |
+
# Example alert: model taking too long
|
33 |
+
if inference_time > 5.0: # Threshold for slow inference
|
34 |
+
alert_msg = f"ALERT: Model '{model_id}' inference time exceeded 5.0s: {inference_time:.4f}s"
|
35 |
+
self.alerts.append(alert_msg)
|
36 |
+
logger.warning(alert_msg)
|
37 |
+
|
38 |
+
# Example alert: low confidence
|
39 |
+
if confidence_score < 0.5: # Threshold for low confidence
|
40 |
+
alert_msg = f"ALERT: Model '{model_id}' returned low confidence: {confidence_score:.2f}"
|
41 |
+
self.alerts.append(alert_msg)
|
42 |
+
logger.warning(alert_msg)
|
43 |
+
|
44 |
+
logger.info(f"Updated metrics for '{model_id}': {metrics}")
|
45 |
+
|
46 |
+
def get_performance_summary(self):
|
47 |
+
logger.info("Generating performance summary for all models.")
|
48 |
+
summary = {}
|
49 |
+
for model_id, metrics in self.performance_metrics.items():
|
50 |
+
avg_confidence = metrics["total_confidence"] / metrics["total_predictions"] if metrics["total_predictions"] > 0 else 0
|
51 |
+
avg_inference_time = metrics["total_inference_time"] / metrics["total_predictions"] if metrics["total_predictions"] > 0 else 0
|
52 |
+
summary[model_id] = {
|
53 |
+
"avg_confidence": avg_confidence,
|
54 |
+
"avg_inference_time": avg_inference_time,
|
55 |
+
"total_predictions": metrics["total_predictions"]
|
56 |
+
}
|
57 |
+
logger.info(f"Performance summary: {summary}")
|
58 |
+
return summary
|
59 |
+
|
60 |
+
class WeightOptimizationAgent:
|
61 |
+
def __init__(self, weight_manager):
|
62 |
+
logger.info("Initializing WeightOptimizationAgent.")
|
63 |
+
self.weight_manager = weight_manager
|
64 |
+
self.prediction_history = []
|
65 |
+
self.performance_window = timedelta(hours=24) # Evaluate performance over last 24 hours
|
66 |
+
|
67 |
+
def analyze_performance(self, final_prediction, ground_truth=None):
|
68 |
+
logger.info(f"Analyzing performance. Final prediction: {final_prediction}, Ground truth: {ground_truth}")
|
69 |
+
timestamp = datetime.now()
|
70 |
+
self.prediction_history.append({
|
71 |
+
"timestamp": timestamp,
|
72 |
+
"final_prediction": final_prediction,
|
73 |
+
"ground_truth": ground_truth # Ground truth is often not available in real-time
|
74 |
+
})
|
75 |
+
|
76 |
+
# Keep history windowed
|
77 |
+
self.prediction_history = [p for p in self.prediction_history if timestamp - p["timestamp"] < self.performance_window]
|
78 |
+
logger.info(f"Prediction history length: {len(self.prediction_history)}")
|
79 |
+
|
80 |
+
# In a real scenario, this would involve a more complex optimization logic
|
81 |
+
# For now, it just logs the history length.
|
82 |
+
|
83 |
+
class SystemHealthAgent:
|
84 |
+
def __init__(self):
|
85 |
+
logger.info("Initializing SystemHealthAgent.")
|
86 |
+
self.health_metrics = {
|
87 |
+
"cpu_percent": 0,
|
88 |
+
"memory_usage": {"total": 0, "available": 0, "percent": 0},
|
89 |
+
"gpu_utilization": []
|
90 |
+
}
|
91 |
+
|
92 |
+
def monitor_system_health(self):
|
93 |
+
logger.info("Monitoring system health...")
|
94 |
+
self.health_metrics["cpu_percent"] = psutil.cpu_percent(interval=1)
|
95 |
+
mem = psutil.virtual_memory()
|
96 |
+
self.health_metrics["memory_usage"] = {
|
97 |
+
"total": mem.total,
|
98 |
+
"available": mem.available,
|
99 |
+
"percent": mem.percent
|
100 |
+
}
|
101 |
+
|
102 |
+
# Holy moly, been at 99% for hours whoops
|
103 |
+
if mem.percent > 90:
|
104 |
+
logger.warning(f"CRITICAL: System memory usage is at {mem.percent}%. Attempting to clear memory cache...")
|
105 |
+
gc.collect()
|
106 |
+
logger.info("Garbage collection triggered. Re-checking memory usage...")
|
107 |
+
mem_after_gc = psutil.virtual_memory()
|
108 |
+
self.health_metrics["memory_usage_after_gc"] = {
|
109 |
+
"total": mem_after_gc.total,
|
110 |
+
"available": mem_after_gc.available,
|
111 |
+
"percent": mem_after_gc.percent
|
112 |
+
}
|
113 |
+
logger.info(f"Memory usage after GC: {mem_after_gc.percent}%")
|
114 |
+
|
115 |
+
gpu_info = []
|
116 |
+
try:
|
117 |
+
gpus = GPUtil.getGPUs()
|
118 |
+
for gpu in gpus:
|
119 |
+
gpu_info.append({
|
120 |
+
"id": gpu.id,
|
121 |
+
"name": gpu.name,
|
122 |
+
"load": gpu.load,
|
123 |
+
"memoryUtil": gpu.memoryUtil,
|
124 |
+
"memoryTotal": gpu.memoryTotal,
|
125 |
+
"memoryUsed": gpu.memoryUsed
|
126 |
+
})
|
127 |
+
except Exception as e:
|
128 |
+
logger.warning(f"Could not retrieve GPU information: {e}")
|
129 |
+
gpu_info.append({"error": str(e)})
|
130 |
+
self.health_metrics["gpu_utilization"] = gpu_info
|
131 |
+
logger.info(f"System health metrics: CPU: {self.health_metrics['cpu_percent']}%, Memory: {self.health_metrics['memory_usage']['percent']}%, GPU: {gpu_info}")
|
agents/{weight_management.py β ensemble_weights.py}
RENAMED
@@ -1,10 +1,12 @@
|
|
1 |
import logging
|
2 |
import torch
|
|
|
3 |
|
4 |
logger = logging.getLogger(__name__)
|
5 |
|
6 |
class ContextualWeightOverrideAgent:
|
7 |
def __init__(self):
|
|
|
8 |
self.context_overrides = {
|
9 |
# Example: when image is outdoor, model_X is penalized, model_Y is boosted
|
10 |
"outdoor": {
|
@@ -23,7 +25,7 @@ class ContextualWeightOverrideAgent:
|
|
23 |
}
|
24 |
|
25 |
def get_overrides(self, context_tags: list[str]) -> dict:
|
26 |
-
"
|
27 |
combined_overrides = {}
|
28 |
for tag in context_tags:
|
29 |
if tag in self.context_overrides:
|
@@ -31,21 +33,37 @@ class ContextualWeightOverrideAgent:
|
|
31 |
# If a model appears in multiple contexts, we can decide how to combine (e.g., multiply, average, take max)
|
32 |
# For now, let's just take the last one if there are conflicts, or multiply for simple cumulative effect.
|
33 |
combined_overrides[model_id] = combined_overrides.get(model_id, 1.0) * multiplier
|
|
|
34 |
return combined_overrides
|
35 |
|
36 |
|
37 |
class ModelWeightManager:
|
38 |
-
def __init__(self):
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
self.situation_weights = {
|
50 |
"high_confidence": 1.2, # Boost weights for high confidence predictions
|
51 |
"low_confidence": 0.8, # Reduce weights for low confidence
|
@@ -56,52 +74,80 @@ class ModelWeightManager:
|
|
56 |
|
57 |
def adjust_weights(self, predictions, confidence_scores, context_tags: list[str] = None):
|
58 |
"""Dynamically adjust weights based on prediction patterns and optional context."""
|
|
|
59 |
adjusted_weights = self.base_weights.copy()
|
|
|
60 |
|
61 |
# 1. Apply contextual overrides first
|
62 |
if context_tags:
|
|
|
63 |
overrides = self.context_override_agent.get_overrides(context_tags)
|
64 |
for model_id, multiplier in overrides.items():
|
65 |
adjusted_weights[model_id] = adjusted_weights.get(model_id, 0.0) * multiplier
|
|
|
66 |
|
67 |
# 2. Apply situation-based adjustments (consensus, conflict, confidence)
|
68 |
# Check for consensus
|
69 |
-
|
|
|
|
|
70 |
for model in adjusted_weights:
|
71 |
adjusted_weights[model] *= self.situation_weights["consensus"]
|
|
|
72 |
|
73 |
# Check for conflicts
|
74 |
-
|
|
|
|
|
75 |
for model in adjusted_weights:
|
76 |
adjusted_weights[model] *= self.situation_weights["conflict"]
|
|
|
77 |
|
78 |
# Adjust based on confidence
|
|
|
79 |
for model, confidence in confidence_scores.items():
|
80 |
if confidence > 0.8:
|
81 |
adjusted_weights[model] *= self.situation_weights["high_confidence"]
|
|
|
82 |
elif confidence < 0.5:
|
83 |
adjusted_weights[model] *= self.situation_weights["low_confidence"]
|
|
|
|
|
84 |
|
85 |
-
|
|
|
|
|
86 |
|
87 |
def _has_consensus(self, predictions):
|
88 |
"""Check if models agree on prediction"""
|
89 |
-
|
90 |
non_none_predictions = [p.get("Label") for p in predictions.values() if p is not None and isinstance(p, dict) and p.get("Label") is not None and p.get("Label") != "Error"]
|
91 |
-
|
|
|
|
|
|
|
92 |
|
93 |
def _has_conflicts(self, predictions):
|
94 |
"""Check if models have conflicting predictions"""
|
95 |
-
|
96 |
non_none_predictions = [p.get("Label") for p in predictions.values() if p is not None and isinstance(p, dict) and p.get("Label") is not None and p.get("Label") != "Error"]
|
97 |
-
|
|
|
|
|
|
|
98 |
|
99 |
def _normalize_weights(self, weights):
|
100 |
"""Normalize weights to sum to 1"""
|
|
|
101 |
total = sum(weights.values())
|
102 |
if total == 0:
|
103 |
-
|
104 |
-
#
|
105 |
-
|
106 |
-
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import logging
|
2 |
import torch
|
3 |
+
from utils.registry import MODEL_REGISTRY # Import MODEL_REGISTRY
|
4 |
|
5 |
logger = logging.getLogger(__name__)
|
6 |
|
7 |
class ContextualWeightOverrideAgent:
|
8 |
def __init__(self):
|
9 |
+
logger.info("Initializing ContextualWeightOverrideAgent.")
|
10 |
self.context_overrides = {
|
11 |
# Example: when image is outdoor, model_X is penalized, model_Y is boosted
|
12 |
"outdoor": {
|
|
|
25 |
}
|
26 |
|
27 |
def get_overrides(self, context_tags: list[str]) -> dict:
|
28 |
+
logger.info(f"Getting weight overrides for context tags: {context_tags}")
|
29 |
combined_overrides = {}
|
30 |
for tag in context_tags:
|
31 |
if tag in self.context_overrides:
|
|
|
33 |
# If a model appears in multiple contexts, we can decide how to combine (e.g., multiply, average, take max)
|
34 |
# For now, let's just take the last one if there are conflicts, or multiply for simple cumulative effect.
|
35 |
combined_overrides[model_id] = combined_overrides.get(model_id, 1.0) * multiplier
|
36 |
+
logger.info(f"Combined context overrides: {combined_overrides}")
|
37 |
return combined_overrides
|
38 |
|
39 |
|
40 |
class ModelWeightManager:
|
41 |
+
def __init__(self, strongest_model_id: str = None):
|
42 |
+
logger.info(f"Initializing ModelWeightManager with strongest_model_id: {strongest_model_id}")
|
43 |
+
# Dynamically initialize base_weights from MODEL_REGISTRY
|
44 |
+
num_models = len(MODEL_REGISTRY)
|
45 |
+
if num_models > 0:
|
46 |
+
if strongest_model_id and strongest_model_id in MODEL_REGISTRY:
|
47 |
+
logger.info(f"Designating '{strongest_model_id}' as the strongest model.")
|
48 |
+
# Assign a high weight to the strongest model (e.g., 50%)
|
49 |
+
strongest_weight_share = 0.5
|
50 |
+
self.base_weights = {strongest_model_id: strongest_weight_share}
|
51 |
+
remaining_models = [mid for mid in MODEL_REGISTRY.keys() if mid != strongest_model_id]
|
52 |
+
if remaining_models:
|
53 |
+
other_models_weight_share = (1.0 - strongest_weight_share) / len(remaining_models)
|
54 |
+
for model_id in remaining_models:
|
55 |
+
self.base_weights[model_id] = other_models_weight_share
|
56 |
+
else: # Only one model, which is the strongest
|
57 |
+
self.base_weights[strongest_model_id] = 1.0
|
58 |
+
else:
|
59 |
+
if strongest_model_id and strongest_model_id not in MODEL_REGISTRY:
|
60 |
+
logger.warning(f"Strongest model ID '{strongest_model_id}' not found in MODEL_REGISTRY. Distributing weights equally.")
|
61 |
+
initial_weight = 1.0 / num_models
|
62 |
+
self.base_weights = {model_id: initial_weight for model_id in MODEL_REGISTRY.keys()}
|
63 |
+
else:
|
64 |
+
self.base_weights = {} # Handle case with no registered models
|
65 |
+
logger.info(f"Base weights initialized: {self.base_weights}")
|
66 |
+
|
67 |
self.situation_weights = {
|
68 |
"high_confidence": 1.2, # Boost weights for high confidence predictions
|
69 |
"low_confidence": 0.8, # Reduce weights for low confidence
|
|
|
74 |
|
75 |
def adjust_weights(self, predictions, confidence_scores, context_tags: list[str] = None):
|
76 |
"""Dynamically adjust weights based on prediction patterns and optional context."""
|
77 |
+
logger.info("Adjusting model weights.")
|
78 |
adjusted_weights = self.base_weights.copy()
|
79 |
+
logger.info(f"Initial adjusted weights (copy of base): {adjusted_weights}")
|
80 |
|
81 |
# 1. Apply contextual overrides first
|
82 |
if context_tags:
|
83 |
+
logger.info(f"Applying contextual overrides for tags: {context_tags}")
|
84 |
overrides = self.context_override_agent.get_overrides(context_tags)
|
85 |
for model_id, multiplier in overrides.items():
|
86 |
adjusted_weights[model_id] = adjusted_weights.get(model_id, 0.0) * multiplier
|
87 |
+
logger.info(f"Adjusted weights after context overrides: {adjusted_weights}")
|
88 |
|
89 |
# 2. Apply situation-based adjustments (consensus, conflict, confidence)
|
90 |
# Check for consensus
|
91 |
+
has_consensus = self._has_consensus(predictions)
|
92 |
+
if has_consensus:
|
93 |
+
logger.info("Consensus detected. Boosting weights for consensus.")
|
94 |
for model in adjusted_weights:
|
95 |
adjusted_weights[model] *= self.situation_weights["consensus"]
|
96 |
+
logger.info(f"Adjusted weights after consensus boost: {adjusted_weights}")
|
97 |
|
98 |
# Check for conflicts
|
99 |
+
has_conflicts = self._has_conflicts(predictions)
|
100 |
+
if has_conflicts:
|
101 |
+
logger.info("Conflicts detected. Reducing weights for conflict.")
|
102 |
for model in adjusted_weights:
|
103 |
adjusted_weights[model] *= self.situation_weights["conflict"]
|
104 |
+
logger.info(f"Adjusted weights after conflict reduction: {adjusted_weights}")
|
105 |
|
106 |
# Adjust based on confidence
|
107 |
+
logger.info("Adjusting weights based on model confidence scores.")
|
108 |
for model, confidence in confidence_scores.items():
|
109 |
if confidence > 0.8:
|
110 |
adjusted_weights[model] *= self.situation_weights["high_confidence"]
|
111 |
+
logger.info(f"Model '{model}' has high confidence ({confidence:.2f}). Weight boosted.")
|
112 |
elif confidence < 0.5:
|
113 |
adjusted_weights[model] *= self.situation_weights["low_confidence"]
|
114 |
+
logger.info(f"Model '{model}' has low confidence ({confidence:.2f}). Weight reduced.")
|
115 |
+
logger.info(f"Adjusted weights before normalization: {adjusted_weights}")
|
116 |
|
117 |
+
normalized_weights = self._normalize_weights(adjusted_weights)
|
118 |
+
logger.info(f"Final normalized adjusted weights: {normalized_weights}")
|
119 |
+
return normalized_weights
|
120 |
|
121 |
def _has_consensus(self, predictions):
|
122 |
"""Check if models agree on prediction"""
|
123 |
+
logger.info("Checking for consensus among model predictions.")
|
124 |
non_none_predictions = [p.get("Label") for p in predictions.values() if p is not None and isinstance(p, dict) and p.get("Label") is not None and p.get("Label") != "Error"]
|
125 |
+
logger.debug(f"Non-none predictions for consensus check: {non_none_predictions}")
|
126 |
+
result = len(non_none_predictions) > 0 and len(set(non_none_predictions)) == 1
|
127 |
+
logger.info(f"Consensus detected: {result}")
|
128 |
+
return result
|
129 |
|
130 |
def _has_conflicts(self, predictions):
|
131 |
"""Check if models have conflicting predictions"""
|
132 |
+
logger.info("Checking for conflicts among model predictions.")
|
133 |
non_none_predictions = [p.get("Label") for p in predictions.values() if p is not None and isinstance(p, dict) and p.get("Label") is not None and p.get("Label") != "Error"]
|
134 |
+
logger.debug(f"Non-none predictions for conflict check: {non_none_predictions}")
|
135 |
+
result = len(non_none_predictions) > 1 and len(set(non_none_predictions)) > 1
|
136 |
+
logger.info(f"Conflicts detected: {result}")
|
137 |
+
return result
|
138 |
|
139 |
def _normalize_weights(self, weights):
|
140 |
"""Normalize weights to sum to 1"""
|
141 |
+
logger.info("Normalizing weights.")
|
142 |
total = sum(weights.values())
|
143 |
if total == 0:
|
144 |
+
logger.warning("All weights became zero after adjustments. Reverting to equal base weights for registered models.")
|
145 |
+
# Revert to equal weights for all *registered* models if total becomes zero
|
146 |
+
num_registered_models = len(MODEL_REGISTRY)
|
147 |
+
if num_registered_models > 0:
|
148 |
+
return {k: 1.0/num_registered_models for k in MODEL_REGISTRY.keys()}
|
149 |
+
else:
|
150 |
+
return {} # No models registered
|
151 |
+
normalized = {k: v/total for k, v in weights.items()}
|
152 |
+
logger.info(f"Weights normalized. Total sum: {sum(normalized.values()):.2f}")
|
153 |
+
return normalized
|
agents/monitoring_agents.py
DELETED
@@ -1,135 +0,0 @@
|
|
1 |
-
import logging
|
2 |
-
import time
|
3 |
-
import torch
|
4 |
-
import psutil # Ensure psutil is imported here as well
|
5 |
-
|
6 |
-
logger = logging.getLogger(__name__)
|
7 |
-
|
8 |
-
class EnsembleMonitorAgent:
|
9 |
-
def __init__(self):
|
10 |
-
self.performance_metrics = {
|
11 |
-
"model_accuracy": {},
|
12 |
-
"response_times": {},
|
13 |
-
"confidence_distribution": {},
|
14 |
-
"consensus_rate": 0.0
|
15 |
-
}
|
16 |
-
self.alerts = []
|
17 |
-
|
18 |
-
def monitor_prediction(self, model_id, prediction, confidence, response_time):
|
19 |
-
"""Monitor individual model performance"""
|
20 |
-
if model_id not in self.performance_metrics["model_accuracy"]:
|
21 |
-
self.performance_metrics["model_accuracy"][model_id] = []
|
22 |
-
self.performance_metrics["response_times"][model_id] = []
|
23 |
-
self.performance_metrics["confidence_distribution"][model_id] = []
|
24 |
-
|
25 |
-
self.performance_metrics["response_times"][model_id].append(response_time)
|
26 |
-
self.performance_metrics["confidence_distribution"][model_id].append(confidence)
|
27 |
-
|
28 |
-
# Check for performance issues
|
29 |
-
self._check_performance_issues(model_id)
|
30 |
-
|
31 |
-
def _check_performance_issues(self, model_id):
|
32 |
-
"""Check for any performance anomalies"""
|
33 |
-
response_times = self.performance_metrics["response_times"][model_id]
|
34 |
-
if len(response_times) > 10:
|
35 |
-
avg_time = sum(response_times[-10:]) / 10
|
36 |
-
if avg_time > 2.0: # More than 2 seconds
|
37 |
-
self.alerts.append(f"High latency detected for {model_id}: {avg_time:.2f}s")
|
38 |
-
|
39 |
-
class WeightOptimizationAgent:
|
40 |
-
def __init__(self, weight_manager):
|
41 |
-
self.weight_manager = weight_manager
|
42 |
-
self.prediction_history = [] # Stores (ensemble_prediction_label, assumed_actual_label)
|
43 |
-
self.optimization_threshold = 0.05 # 5% change in accuracy triggers optimization
|
44 |
-
self.min_history_for_optimization = 20 # Minimum samples before optimizing
|
45 |
-
|
46 |
-
def analyze_performance(self, ensemble_prediction_label, actual_label=None):
|
47 |
-
"""Analyze ensemble performance and record for optimization"""
|
48 |
-
# If actual_label is not provided, assume ensemble is correct if not UNCERTAIN
|
49 |
-
assumed_actual_label = actual_label
|
50 |
-
if assumed_actual_label is None and ensemble_prediction_label != "UNCERTAIN":
|
51 |
-
assumed_actual_label = ensemble_prediction_label
|
52 |
-
|
53 |
-
self.prediction_history.append((ensemble_prediction_label, assumed_actual_label))
|
54 |
-
|
55 |
-
if len(self.prediction_history) >= self.min_history_for_optimization and self._should_optimize():
|
56 |
-
self._optimize_weights()
|
57 |
-
|
58 |
-
def _calculate_accuracy(self, history_subset):
|
59 |
-
"""Calculates accuracy based on history where actual_label is known."""
|
60 |
-
correct_predictions = 0
|
61 |
-
total_known = 0
|
62 |
-
for ensemble_pred, actual_label in history_subset:
|
63 |
-
if actual_label is not None:
|
64 |
-
total_known += 1
|
65 |
-
if ensemble_pred == actual_label:
|
66 |
-
correct_predictions += 1
|
67 |
-
return correct_predictions / total_known if total_known > 0 else 0.0
|
68 |
-
|
69 |
-
def _should_optimize(self):
|
70 |
-
"""Determine if weights should be optimized based on recent performance change."""
|
71 |
-
if len(self.prediction_history) < self.min_history_for_optimization * 2: # Need enough history for comparison
|
72 |
-
return False
|
73 |
-
|
74 |
-
# Compare accuracy of recent batch with previous batch
|
75 |
-
recent_batch = self.prediction_history[-self.min_history_for_optimization:]
|
76 |
-
previous_batch = self.prediction_history[-self.min_history_for_optimization*2:-self.min_history_for_optimization]
|
77 |
-
|
78 |
-
recent_accuracy = self._calculate_accuracy(recent_batch)
|
79 |
-
previous_accuracy = self._calculate_accuracy(previous_batch)
|
80 |
-
|
81 |
-
# Trigger optimization if there's a significant drop in accuracy
|
82 |
-
if previous_accuracy > 0 and (previous_accuracy - recent_accuracy) / previous_accuracy > self.optimization_threshold:
|
83 |
-
logger.warning(f"Performance degradation detected (from {previous_accuracy:.2f} to {recent_accuracy:.2f}). Triggering weight optimization.")
|
84 |
-
return True
|
85 |
-
return False
|
86 |
-
|
87 |
-
def _optimize_weights(self):
|
88 |
-
"""Optimize model weights based on performance."""
|
89 |
-
logger.info("Optimizing model weights based on recent performance.")
|
90 |
-
# Placeholder for sophisticated optimization logic.
|
91 |
-
# This is where you would adjust self.weight_manager.base_weights
|
92 |
-
# based on which models contributed more to correct predictions or errors.
|
93 |
-
# For now, it's just a log message.
|
94 |
-
|
95 |
-
|
96 |
-
class SystemHealthAgent:
|
97 |
-
def __init__(self):
|
98 |
-
self.health_metrics = {
|
99 |
-
"memory_usage": [],
|
100 |
-
"gpu_utilization": [],
|
101 |
-
"model_load_times": {},
|
102 |
-
"error_rates": {}
|
103 |
-
}
|
104 |
-
|
105 |
-
def monitor_system_health(self):
|
106 |
-
"""Monitor overall system health"""
|
107 |
-
self._check_memory_usage()
|
108 |
-
self._check_gpu_utilization()
|
109 |
-
# You might add _check_model_health() here later
|
110 |
-
|
111 |
-
def _check_memory_usage(self):
|
112 |
-
"""Monitor memory usage"""
|
113 |
-
try:
|
114 |
-
import psutil
|
115 |
-
memory = psutil.virtual_memory()
|
116 |
-
self.health_metrics["memory_usage"].append(memory.percent)
|
117 |
-
|
118 |
-
if memory.percent > 90:
|
119 |
-
logger.warning(f"High memory usage detected: {memory.percent}%")
|
120 |
-
except ImportError:
|
121 |
-
logger.warning("psutil not installed. Cannot monitor memory usage.")
|
122 |
-
|
123 |
-
def _check_gpu_utilization(self):
|
124 |
-
"""Monitor GPU utilization if available"""
|
125 |
-
if torch.cuda.is_available():
|
126 |
-
try:
|
127 |
-
gpu_util = torch.cuda.memory_allocated() / torch.cuda.max_memory_allocated()
|
128 |
-
self.health_metrics["gpu_utilization"].append(gpu_util)
|
129 |
-
|
130 |
-
if gpu_util > 0.9:
|
131 |
-
logger.warning(f"High GPU utilization detected: {gpu_util*100:.2f}%")
|
132 |
-
except Exception as e:
|
133 |
-
logger.warning(f"Error monitoring GPU utilization: {e}")
|
134 |
-
else:
|
135 |
-
logger.info("CUDA not available. Skipping GPU utilization monitoring.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agents/smart_agents.py
CHANGED
@@ -1,54 +1,126 @@
|
|
1 |
import logging
|
2 |
-
|
|
|
|
|
|
|
3 |
|
4 |
logger = logging.getLogger(__name__)
|
5 |
|
6 |
class ContextualIntelligenceAgent:
|
7 |
def __init__(self):
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
def infer_context_tags(self,
|
12 |
-
"
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
#
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
context_tags.append("potentially_natural_scene")
|
22 |
|
23 |
-
# Mock external detection (e.g., from a simpler scene classification model or EXIF data)
|
24 |
-
# For demonstration, we'll hardcode some possible tags here.
|
25 |
-
# In a real system, you'd feed actual image features or metadata to an LLM.
|
26 |
-
mock_tags = ["outdoor", "sunny"] # These could be returned by an actual LLM based on input
|
27 |
-
for tag in mock_tags:
|
28 |
-
if tag not in context_tags:
|
29 |
-
context_tags.append(tag)
|
30 |
-
|
31 |
-
return context_tags
|
32 |
|
33 |
class ForensicAnomalyDetectionAgent:
|
34 |
def __init__(self):
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
37 |
|
38 |
def analyze_forensic_outputs(self, forensic_output_descriptions: list[str]) -> dict:
|
39 |
-
"
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
for
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
if len(
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import logging
|
2 |
+
import torch
|
3 |
+
import numpy as np
|
4 |
+
from PIL import Image # For image processing context
|
5 |
+
# import smolagents # Removed unused import
|
6 |
|
7 |
logger = logging.getLogger(__name__)
|
8 |
|
9 |
class ContextualIntelligenceAgent:
|
10 |
def __init__(self):
|
11 |
+
logger.info("Initializing ContextualIntelligenceAgent.")
|
12 |
+
# This would be a more sophisticated model in a real scenario
|
13 |
+
self.context_rules = {
|
14 |
+
"high_resolution": {"min_width": 1920, "min_height": 1080, "tag": "high_resolution_image"},
|
15 |
+
"low_resolution": {"max_width": 640, "max_height": 480, "tag": "low_resolution_image"},
|
16 |
+
"grayscale": {"mode": "L", "tag": "grayscale_image"},
|
17 |
+
"potentially_natural_scene": {"keywords": ["Real"], "threshold": 0.7, "tag": "potentially_natural_scene"},
|
18 |
+
"potentially_ai_generated": {"keywords": ["AI", "Fake", "Deepfake"], "threshold": 0.7, "tag": "potentially_ai_generated"},
|
19 |
+
"outdoor": {"model_tags": ["sunny", "sky", "trees"], "tag": "outdoor"},
|
20 |
+
"indoor": {"model_tags": ["room", "furniture"], "tag": "indoor"},
|
21 |
+
"sunny": {"rgb_avg_min": [200, 200, 100], "tag": "sunny"},
|
22 |
+
"dark": {"rgb_avg_max": [50, 50, 50], "tag": "dark"},
|
23 |
+
}
|
24 |
|
25 |
+
def infer_context_tags(self, image_metadata: dict, model_predictions: dict) -> list[str]:
|
26 |
+
logger.info("Inferring context tags from image metadata and model predictions.")
|
27 |
+
detected_tags = []
|
28 |
+
|
29 |
+
# Analyze image metadata
|
30 |
+
width = image_metadata.get("width", 0)
|
31 |
+
height = image_metadata.get("height", 0)
|
32 |
+
mode = image_metadata.get("mode", "RGB")
|
33 |
+
|
34 |
+
if width >= self.context_rules["high_resolution"]["min_width"] and \
|
35 |
+
height >= self.context_rules["high_resolution"]["min_height"]:
|
36 |
+
detected_tags.append(self.context_rules["high_resolution"]["tag"])
|
37 |
+
logger.debug(f"Detected tag: {self.context_rules['high_resolution']['tag']}")
|
38 |
+
|
39 |
+
if width <= self.context_rules["low_resolution"]["max_width"] and \
|
40 |
+
height <= self.context_rules["low_resolution"]["max_height"]:
|
41 |
+
detected_tags.append(self.context_rules["low_resolution"]["tag"])
|
42 |
+
logger.debug(f"Detected tag: {self.context_rules['low_resolution']['tag']}")
|
43 |
+
|
44 |
+
if mode == self.context_rules["grayscale"]["mode"]:
|
45 |
+
detected_tags.append(self.context_rules["grayscale"]["tag"])
|
46 |
+
logger.debug(f"Detected tag: {self.context_rules['grayscale']['tag']}")
|
47 |
+
|
48 |
+
# Analyze model predictions for general context
|
49 |
+
for model_id, prediction in model_predictions.items():
|
50 |
+
label = prediction.get("Label")
|
51 |
+
ai_score = prediction.get("AI Score", 0.0)
|
52 |
+
real_score = prediction.get("Real Score", 0.0)
|
53 |
+
|
54 |
+
if label and "potentially_natural_scene" not in detected_tags:
|
55 |
+
for keyword in self.context_rules["potentially_natural_scene"]["keywords"]:
|
56 |
+
if keyword in label and real_score >= self.context_rules["potentially_natural_scene"]["threshold"]:
|
57 |
+
detected_tags.append(self.context_rules["potentially_natural_scene"]["tag"])
|
58 |
+
logger.debug(f"Detected tag: {self.context_rules['potentially_natural_scene']['tag']}")
|
59 |
+
break # Only add once
|
60 |
+
|
61 |
+
if label and "potentially_ai_generated" not in detected_tags:
|
62 |
+
for keyword in self.context_rules["potentially_ai_generated"]["keywords"]:
|
63 |
+
if keyword in label and ai_score >= self.context_rules["potentially_ai_generated"]["threshold"]:
|
64 |
+
detected_tags.append(self.context_rules["potentially_ai_generated"]["tag"])
|
65 |
+
logger.debug(f"Detected tag: {self.context_rules['potentially_ai_generated']['tag']}")
|
66 |
+
break # Only add once
|
67 |
|
68 |
+
# Simulate simple scene detection based on general consensus if available
|
69 |
+
# This is a very basic simulation; a real system would use a separate scene classification model
|
70 |
+
if "potentially_natural_scene" in detected_tags and "potentially_ai_generated" not in detected_tags:
|
71 |
+
# Simulate outdoor/sunny detection based on presence of a real image tag
|
72 |
+
# In a real scenario, this would involve analyzing image features
|
73 |
+
if real_score > 0.8: # Placeholder for actual image feature analysis
|
74 |
+
detected_tags.append(self.context_rules["outdoor"]["tag"])
|
75 |
+
detected_tags.append(self.context_rules["sunny"]["tag"])
|
76 |
+
logger.debug(f"Simulated tags: {self.context_rules['outdoor']['tag']},{self.context_rules['sunny']['tag']}")
|
77 |
|
78 |
+
logger.info(f"Inferred context tags: {detected_tags}")
|
79 |
+
return detected_tags
|
|
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
class ForensicAnomalyDetectionAgent:
|
83 |
def __init__(self):
|
84 |
+
logger.info("Initializing ForensicAnomalyDetectionAgent.")
|
85 |
+
self.anomaly_thresholds = {
|
86 |
+
"ELA": {"min_anomalies": 3, "max_error_std": 20}, # Example thresholds
|
87 |
+
"gradient": {"min_sharp_edges": 500},
|
88 |
+
"minmax": {"min_local_deviation": 0.1}
|
89 |
+
}
|
90 |
|
91 |
def analyze_forensic_outputs(self, forensic_output_descriptions: list[str]) -> dict:
|
92 |
+
logger.info("Analyzing forensic outputs for anomalies.")
|
93 |
+
anomalies_detected = []
|
94 |
+
summary_message = "No significant anomalies detected."
|
95 |
+
|
96 |
+
# Example: Check for ELA anomalies (simplified)
|
97 |
+
ela_anomalies = [desc for desc in forensic_output_descriptions if "ELA analysis" in desc and "enhanced contrast" in desc]
|
98 |
+
if len(ela_anomalies) > self.anomaly_thresholds["ELA"]["min_anomalies"]:
|
99 |
+
anomalies_detected.append("Multiple ELA passes indicate potential inconsistencies.")
|
100 |
+
logger.warning("Detected multiple ELA passes indicating potential inconsistencies.")
|
101 |
+
|
102 |
+
# Example: Check for gradient anomalies (simplified)
|
103 |
+
gradient_anomalies = [desc for desc in forensic_output_descriptions if "Gradient processing" in desc]
|
104 |
+
if len(gradient_anomalies) > 1 and "Highlights edges and transitions" in gradient_anomalies[0]:
|
105 |
+
# This is a placeholder for actual image analysis, e.g., checking standard deviation of gradients
|
106 |
+
anomalies_detected.append("Gradient analysis shows unusual edge patterns.")
|
107 |
+
logger.warning("Detected unusual edge patterns from gradient analysis.")
|
108 |
+
|
109 |
+
# Example: Check for MinMax anomalies (simplified)
|
110 |
+
minmax_anomalies = [desc for desc in forensic_output_descriptions if "MinMax processing" in desc]
|
111 |
+
if len(minmax_anomalies) > 1 and "Deviations in local pixel values" in minmax_anomalies[0]:
|
112 |
+
# Placeholder for actual analysis of minmax output, e.g., deviation variance
|
113 |
+
anomalies_detected.append("MinMax processing reveals subtle pixel deviations.")
|
114 |
+
logger.warning("Detected subtle pixel deviations from MinMax processing.")
|
115 |
|
116 |
+
if "Bit Plane extractor" in str(forensic_output_descriptions):
|
117 |
+
anomalies_detected.append("Bit Plane extraction performed.")
|
118 |
+
logger.info("Bit Plane extraction performed.")
|
119 |
+
|
120 |
+
if anomalies_detected:
|
121 |
+
summary_message = "Potential anomalies detected: " + "; ".join(anomalies_detected)
|
122 |
+
logger.warning(f"Forensic anomaly detection summary: {summary_message}")
|
123 |
+
else:
|
124 |
+
logger.info(f"Forensic anomaly detection summary: {summary_message}")
|
125 |
+
|
126 |
+
return {"anomalies": anomalies_detected, "summary": summary_message}
|
app.backup.py
CHANGED
@@ -13,10 +13,10 @@ import numpy as np
|
|
13 |
import io
|
14 |
import logging
|
15 |
from utils.utils import softmax, augment_image, convert_pil_to_bytes
|
16 |
-
from
|
17 |
-
from
|
18 |
-
from
|
19 |
-
from
|
20 |
|
21 |
# Configure logging
|
22 |
logging.basicConfig(level=logging.INFO)
|
@@ -276,7 +276,7 @@ def predict_image_with_html(img, confidence_threshold, augment_methods, rotate_d
|
|
276 |
img_np_og = np.array(img) # Convert PIL Image to NumPy array
|
277 |
|
278 |
gradient_image = gradient_processing(img_np) # Added gradient processing
|
279 |
-
minmax_image =
|
280 |
|
281 |
# First pass - standard analysis
|
282 |
ela1 = ELA(img_np_og, quality=75, scale=50, contrast=20, linear=False, grayscale=True)
|
|
|
13 |
import io
|
14 |
import logging
|
15 |
from utils.utils import softmax, augment_image, convert_pil_to_bytes
|
16 |
+
from forensics.gradient import gradient_processing
|
17 |
+
from forensics.minmax import minmax_process
|
18 |
+
from forensics.ela import ELA
|
19 |
+
from forensics.wavelet import wavelet_blocking_noise_estimation
|
20 |
|
21 |
# Configure logging
|
22 |
logging.basicConfig(level=logging.INFO)
|
|
|
276 |
img_np_og = np.array(img) # Convert PIL Image to NumPy array
|
277 |
|
278 |
gradient_image = gradient_processing(img_np) # Added gradient processing
|
279 |
+
minmax_image = minmax_process(img_np) # Added MinMax processing
|
280 |
|
281 |
# First pass - standard analysis
|
282 |
ela1 = ELA(img_np_og, quality=75, scale=50, contrast=20, linear=False, grayscale=True)
|
app.py
ADDED
@@ -0,0 +1,728 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from gradio_client import Client, handle_file
|
3 |
+
from PIL import Image, ImageFilter
|
4 |
+
import numpy as np
|
5 |
+
import os
|
6 |
+
import time
|
7 |
+
import logging
|
8 |
+
import io
|
9 |
+
import collections
|
10 |
+
import onnxruntime
|
11 |
+
|
12 |
+
from utils.utils import softmax, augment_image
|
13 |
+
from forensics.gradient import gradient_processing
|
14 |
+
from forensics.minmax import minmax_process
|
15 |
+
from forensics.ela import ELA
|
16 |
+
from forensics.wavelet import noise_estimation
|
17 |
+
from forensics.bitplane import bit_plane_extractor
|
18 |
+
from utils.hf_logger import log_inference_data
|
19 |
+
from utils.load import load_image
|
20 |
+
from agents.ensemble_team import EnsembleMonitorAgent, WeightOptimizationAgent, SystemHealthAgent
|
21 |
+
from agents.smart_agents import ContextualIntelligenceAgent, ForensicAnomalyDetectionAgent
|
22 |
+
from utils.registry import register_model, MODEL_REGISTRY, ModelEntry
|
23 |
+
from agents.ensemble_weights import ModelWeightManager
|
24 |
+
from transformers import pipeline, AutoImageProcessor, SwinForImageClassification, Swinv2ForImageClassification, AutoFeatureExtractor, AutoModelForImageClassification
|
25 |
+
from torchvision import transforms
|
26 |
+
import torch
|
27 |
+
import json
|
28 |
+
from huggingface_hub import CommitScheduler
|
29 |
+
from dotenv import load_dotenv
|
30 |
+
|
31 |
+
logging.basicConfig(level=logging.INFO)
|
32 |
+
logger = logging.getLogger(__name__)
|
33 |
+
os.environ['HF_HUB_CACHE'] = './models'
|
34 |
+
|
35 |
+
# --- Gradio Log Handler ---
|
36 |
+
class GradioLogHandler(logging.Handler):
|
37 |
+
def __init__(self, log_queue):
|
38 |
+
super().__init__()
|
39 |
+
self.log_queue = log_queue
|
40 |
+
self.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
41 |
+
|
42 |
+
def emit(self, record):
|
43 |
+
self.log_queue.append(self.format(record))
|
44 |
+
|
45 |
+
log_queue = collections.deque(maxlen=1000) # Store last 1000 log messages
|
46 |
+
gradio_handler = GradioLogHandler(log_queue)
|
47 |
+
|
48 |
+
# Set root logger level to DEBUG to capture all messages from agents
|
49 |
+
logging.getLogger().setLevel(logging.INFO)
|
50 |
+
logging.getLogger().addHandler(gradio_handler)
|
51 |
+
# --- End Gradio Log Handler ---
|
52 |
+
|
53 |
+
LOCAL_LOG_DIR = "./hf_inference_logs"
|
54 |
+
HF_DATASET_NAME="aiwithoutborders-xyz/degentic_rd0"
|
55 |
+
load_dotenv()
|
56 |
+
|
57 |
+
# Custom JSON Encoder to handle numpy types
|
58 |
+
class NumpyEncoder(json.JSONEncoder):
|
59 |
+
def default(self, obj):
|
60 |
+
if isinstance(obj, np.float32):
|
61 |
+
return float(obj)
|
62 |
+
return json.JSONEncoder.default(self, obj)
|
63 |
+
|
64 |
+
# Ensure using GPU if available
|
65 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
66 |
+
|
67 |
+
# Model paths and class names (copied from app_mcp.py)
|
68 |
+
MODEL_PATHS = {
|
69 |
+
"model_1": "haywoodsloan/ai-image-detector-deploy",
|
70 |
+
"model_2": "Heem2/AI-vs-Real-Image-Detection",
|
71 |
+
"model_3": "Organika/sdxl-detector",
|
72 |
+
"model_4": "cmckinle/sdxl-flux-detector_v1.1",
|
73 |
+
"model_5": "prithivMLmods/Deep-Fake-Detector-v2-Model",
|
74 |
+
"model_6": "ideepankarsharma2003/AI_ImageClassification_MidjourneyV6_SDXL",
|
75 |
+
"model_7": "date3k2/vit-real-fake-classification-v4"
|
76 |
+
}
|
77 |
+
|
78 |
+
CLASS_NAMES = {
|
79 |
+
"model_1": ['artificial', 'real'],
|
80 |
+
"model_2": ['AI Image', 'Real Image'],
|
81 |
+
"model_3": ['AI', 'Real'],
|
82 |
+
"model_4": ['AI', 'Real'],
|
83 |
+
"model_5": ['Realism', 'Deepfake'],
|
84 |
+
"model_6": ['ai_gen', 'human'],
|
85 |
+
"model_7": ['Fake', 'Real'],
|
86 |
+
}
|
87 |
+
|
88 |
+
def preprocess_resize_256(image):
|
89 |
+
if image.mode != 'RGB':
|
90 |
+
image = image.convert('RGB')
|
91 |
+
return transforms.Resize((256, 256))(image)
|
92 |
+
|
93 |
+
def preprocess_resize_224(image):
|
94 |
+
if image.mode != 'RGB':
|
95 |
+
image = image.convert('RGB')
|
96 |
+
return transforms.Resize((224, 224))(image)
|
97 |
+
|
98 |
+
def postprocess_pipeline(prediction, class_names):
|
99 |
+
# Assumes HuggingFace pipeline output
|
100 |
+
return {pred['label']: pred['score'] for pred in prediction}
|
101 |
+
|
102 |
+
def postprocess_logits(outputs, class_names):
|
103 |
+
# Assumes model output with logits
|
104 |
+
logits = outputs.logits.cpu().numpy()[0]
|
105 |
+
probabilities = softmax(logits)
|
106 |
+
return {class_names[i]: probabilities[i] for i in range(len(class_names))}
|
107 |
+
|
108 |
+
def register_model_with_metadata(model_id, model, preprocess, postprocess, class_names, display_name, contributor, model_path, architecture=None, dataset=None):
|
109 |
+
entry = ModelEntry(model, preprocess, postprocess, class_names, display_name=display_name, contributor=contributor, model_path=model_path, architecture=architecture, dataset=dataset)
|
110 |
+
MODEL_REGISTRY[model_id] = entry
|
111 |
+
|
112 |
+
# Load and register models (copied from app_mcp.py)
|
113 |
+
# image_processor_1 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_1"], use_fast=True)
|
114 |
+
# model_1 = Swinv2ForImageClassification.from_pretrained(MODEL_PATHS["model_1"]).to(device)
|
115 |
+
# clf_1 = pipeline(model=model_1, task="image-classification", image_processor=image_processor_1, device=device)
|
116 |
+
# register_model_with_metadata(
|
117 |
+
# "model_1", clf_1, preprocess_resize_256, postprocess_pipeline, CLASS_NAMES["model_1"],
|
118 |
+
# display_name="SWIN1", contributor="haywoodsloan", model_path=MODEL_PATHS["model_1"],
|
119 |
+
# architecture="SwinV2", dataset="TBA"
|
120 |
+
# )
|
121 |
+
|
122 |
+
# --- ONNX Quantized Model Example ---
|
123 |
+
ONNX_QUANTIZED_MODEL_PATH = "./models/model_1_quantized.onnx"
|
124 |
+
|
125 |
+
def preprocess_onnx_input(image: Image.Image):
|
126 |
+
# Preprocess image for ONNX model (e.g., for SwinV2, usually 256x256, normalized)
|
127 |
+
if image.mode != 'RGB':
|
128 |
+
image = image.convert('RGB')
|
129 |
+
|
130 |
+
transform = transforms.Compose([
|
131 |
+
transforms.Resize((256, 256)),
|
132 |
+
transforms.ToTensor(),
|
133 |
+
transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]), # ImageNet normalization
|
134 |
+
])
|
135 |
+
input_tensor = transform(image)
|
136 |
+
# ONNX expects numpy array with batch dimension (1, C, H, W)
|
137 |
+
return input_tensor.unsqueeze(0).cpu().numpy()
|
138 |
+
|
139 |
+
def infer_onnx_model(preprocessed_image_np):
|
140 |
+
try:
|
141 |
+
# Ensure the ONNX model exists before trying to load it
|
142 |
+
if not os.path.exists(ONNX_QUANTIZED_MODEL_PATH):
|
143 |
+
logger.error(f"ONNX quantized model not found at: {ONNX_QUANTIZED_MODEL_PATH}")
|
144 |
+
raise FileNotFoundError(f"ONNX quantized model not found at: {ONNX_QUANTIZED_MODEL_PATH}")
|
145 |
+
|
146 |
+
ort_session = onnxruntime.InferenceSession(ONNX_QUANTIZED_MODEL_PATH)
|
147 |
+
ort_inputs = {ort_session.get_inputs()[0].name: preprocessed_image_np}
|
148 |
+
ort_outputs = ort_session.run(None, ort_inputs)
|
149 |
+
|
150 |
+
# Assuming the output is logits, apply softmax to get probabilities
|
151 |
+
logits = ort_outputs[0]
|
152 |
+
probabilities = softmax(logits[0]) # Remove batch dim, apply softmax
|
153 |
+
return {"logits": logits, "probabilities": probabilities}
|
154 |
+
|
155 |
+
except Exception as e:
|
156 |
+
logger.error(f"Error during ONNX inference: {e}")
|
157 |
+
# Return a structure consistent with other model errors
|
158 |
+
return {"logits": np.array([]), "probabilities": np.array([])}
|
159 |
+
|
160 |
+
def postprocess_onnx_output(onnx_output, class_names):
|
161 |
+
probabilities = onnx_output.get("probabilities")
|
162 |
+
if probabilities is not None and len(probabilities) == len(class_names):
|
163 |
+
return {class_names[i]: probabilities[i] for i in range(len(class_names))}
|
164 |
+
else:
|
165 |
+
logger.warning("ONNX post-processing failed or class names mismatch.")
|
166 |
+
return {name: 0.0 for name in class_names}
|
167 |
+
|
168 |
+
# Register the ONNX quantized model
|
169 |
+
register_model_with_metadata(
|
170 |
+
"model_1_onnx_quantized",
|
171 |
+
infer_onnx_model,
|
172 |
+
preprocess_onnx_input,
|
173 |
+
postprocess_onnx_output,
|
174 |
+
CLASS_NAMES["model_1"], # Assuming it uses the same class names as model_1
|
175 |
+
display_name="SWIN1",
|
176 |
+
contributor="haywoodsloan",
|
177 |
+
model_path=ONNX_QUANTIZED_MODEL_PATH,
|
178 |
+
architecture="SwinV2",
|
179 |
+
dataset="TBA"
|
180 |
+
)
|
181 |
+
# --- End ONNX Quantized Model Example ---
|
182 |
+
|
183 |
+
clf_2 = pipeline("image-classification", model=MODEL_PATHS["model_2"], device=device)
|
184 |
+
register_model_with_metadata(
|
185 |
+
"model_2", clf_2, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_2"],
|
186 |
+
display_name="VIT2", contributor="Heem2", model_path=MODEL_PATHS["model_2"],
|
187 |
+
architecture="ViT", dataset="TBA"
|
188 |
+
)
|
189 |
+
|
190 |
+
feature_extractor_3 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_3"], device=device)
|
191 |
+
model_3 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_3"]).to(device)
|
192 |
+
def preprocess_256(image):
|
193 |
+
if image.mode != 'RGB':
|
194 |
+
image = image.convert('RGB')
|
195 |
+
return transforms.Resize((256, 256))(image)
|
196 |
+
def postprocess_logits_model3(outputs, class_names):
|
197 |
+
logits = outputs.logits.cpu().numpy()[0]
|
198 |
+
probabilities = softmax(logits)
|
199 |
+
return {class_names[i]: probabilities[i] for i in range(len(class_names))}
|
200 |
+
def model3_infer(image):
|
201 |
+
inputs = feature_extractor_3(image, return_tensors="pt").to(device)
|
202 |
+
with torch.no_grad():
|
203 |
+
outputs = model_3(**inputs)
|
204 |
+
return outputs
|
205 |
+
register_model_with_metadata(
|
206 |
+
"model_3", model3_infer, preprocess_256, postprocess_logits_model3, CLASS_NAMES["model_3"],
|
207 |
+
display_name="SDXL3", contributor="Organika", model_path=MODEL_PATHS["model_3"],
|
208 |
+
architecture="VIT", dataset="SDXL"
|
209 |
+
)
|
210 |
+
|
211 |
+
feature_extractor_4 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_4"], device=device)
|
212 |
+
model_4 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_4"]).to(device)
|
213 |
+
def model4_infer(image):
|
214 |
+
inputs = feature_extractor_4(image, return_tensors="pt").to(device)
|
215 |
+
with torch.no_grad():
|
216 |
+
outputs = model_4(**inputs)
|
217 |
+
return outputs
|
218 |
+
def postprocess_logits_model4(outputs, class_names):
|
219 |
+
logits = outputs.logits.cpu().numpy()[0]
|
220 |
+
probabilities = softmax(logits)
|
221 |
+
return {class_names[i]: probabilities[i] for i in range(len(class_names))}
|
222 |
+
register_model_with_metadata(
|
223 |
+
"model_4", model4_infer, preprocess_256, postprocess_logits_model4, CLASS_NAMES["model_4"],
|
224 |
+
display_name="XLFLUX4", contributor="cmckinle", model_path=MODEL_PATHS["model_4"],
|
225 |
+
architecture="VIT", dataset="SDXL, FLUX"
|
226 |
+
)
|
227 |
+
|
228 |
+
clf_5 = pipeline("image-classification", model=MODEL_PATHS["model_5"], device=device)
|
229 |
+
register_model_with_metadata(
|
230 |
+
"model_5", clf_5, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_5"],
|
231 |
+
display_name="VIT5", contributor="prithivMLmods", model_path=MODEL_PATHS["model_5"],
|
232 |
+
architecture="VIT", dataset="TBA"
|
233 |
+
)
|
234 |
+
|
235 |
+
image_processor_6 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_6"], use_fast=True)
|
236 |
+
model_6 = SwinForImageClassification.from_pretrained(MODEL_PATHS["model_6"]).to(device)
|
237 |
+
clf_6 = pipeline(model=model_6, task="image-classification", image_processor=image_processor_6, device=device)
|
238 |
+
register_model_with_metadata(
|
239 |
+
"model_6", clf_6, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_6"],
|
240 |
+
display_name="SWIN6", contributor="ideepankarsharma2003", model_path=MODEL_PATHS["model_6"],
|
241 |
+
architecture="SWINv1", dataset="SDXL, Midjourney"
|
242 |
+
)
|
243 |
+
|
244 |
+
image_processor_7 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_7"], use_fast=True)
|
245 |
+
model_7 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_7"]).to(device)
|
246 |
+
clf_7 = pipeline(model=model_7, task="image-classification", image_processor=image_processor_7, device=device)
|
247 |
+
register_model_with_metadata(
|
248 |
+
"model_7", clf_7, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_7"],
|
249 |
+
display_name="VIT7", contributor="date3k2", model_path=MODEL_PATHS["model_7"],
|
250 |
+
architecture="VIT", dataset="TBA"
|
251 |
+
)
|
252 |
+
|
253 |
+
# def postprocess_simple_prediction(result, class_names):
|
254 |
+
# scores = {name: 0.0 for name in class_names}
|
255 |
+
# fake_prob = result.get("Fake Probability")
|
256 |
+
# if fake_prob is not None:
|
257 |
+
# # Assume class_names = ["AI", "REAL"]
|
258 |
+
# scores["AI"] = float(fake_prob)
|
259 |
+
# scores["REAL"] = 1.0 - float(fake_prob)
|
260 |
+
# return scores
|
261 |
+
|
262 |
+
# def simple_prediction(img):
|
263 |
+
# client = Client("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview")
|
264 |
+
# client.view_api()
|
265 |
+
# print(type(img))
|
266 |
+
# result = client.predict(
|
267 |
+
# handle_file(img),
|
268 |
+
# api_name="simple_predict"
|
269 |
+
# )
|
270 |
+
# return result
|
271 |
+
|
272 |
+
|
273 |
+
# register_model_with_metadata(
|
274 |
+
# model_id="simple_prediction",
|
275 |
+
# model=simple_prediction,
|
276 |
+
# preprocess=None,
|
277 |
+
# postprocess=postprocess_simple_prediction,
|
278 |
+
# class_names=["AI", "REAL"],
|
279 |
+
# display_name="Community Forensics",
|
280 |
+
# contributor="Jeongsoo Park",
|
281 |
+
# model_path="aiwithoutborders-xyz/CommunityForensics-DeepfakeDet-ViT",
|
282 |
+
# architecture="ViT", dataset="GOAT"
|
283 |
+
# )
|
284 |
+
def infer(image: Image.Image, model_id: str, confidence_threshold: float = 0.75) -> dict:
|
285 |
+
"""Predict using a specific model.
|
286 |
+
|
287 |
+
Args:
|
288 |
+
image (Image.Image): The input image to classify.
|
289 |
+
model_id (str): The ID of the model to use for classification.
|
290 |
+
confidence_threshold (float, optional): The confidence threshold for classification. Defaults to 0.75.
|
291 |
+
|
292 |
+
Returns:
|
293 |
+
dict: A dictionary containing the model details, classification scores, and label.
|
294 |
+
"""
|
295 |
+
entry = MODEL_REGISTRY[model_id]
|
296 |
+
img = entry.preprocess(image) if entry.preprocess else image
|
297 |
+
try:
|
298 |
+
result = entry.model(img)
|
299 |
+
scores = entry.postprocess(result, entry.class_names)
|
300 |
+
ai_score = float(scores.get(entry.class_names[0], 0.0))
|
301 |
+
real_score = float(scores.get(entry.class_names[1], 0.0))
|
302 |
+
label = "AI" if ai_score >= confidence_threshold else ("REAL" if real_score >= confidence_threshold else "UNCERTAIN")
|
303 |
+
return {
|
304 |
+
"Model": entry.display_name,
|
305 |
+
"Contributor": entry.contributor,
|
306 |
+
"HF Model Path": entry.model_path,
|
307 |
+
"AI Score": ai_score,
|
308 |
+
"Real Score": real_score,
|
309 |
+
"Label": label
|
310 |
+
}
|
311 |
+
except Exception as e:
|
312 |
+
return {
|
313 |
+
"Model": entry.display_name,
|
314 |
+
"Contributor": entry.contributor,
|
315 |
+
"HF Model Path": entry.model_path,
|
316 |
+
"AI Score": 0.0,
|
317 |
+
"Real Score": 0.0,
|
318 |
+
"Label": f"Error: {str(e)}"
|
319 |
+
}
|
320 |
+
|
321 |
+
def full_prediction(img, confidence_threshold, rotate_degrees, noise_level, sharpen_strength):
|
322 |
+
"""Full prediction run, with a team of ensembles and agents.
|
323 |
+
|
324 |
+
Args:
|
325 |
+
img (url: str, Image.Image, np.ndarray): The input image to classify.
|
326 |
+
confidence_threshold (float, optional): The confidence threshold for classification. Defaults to 0.75.
|
327 |
+
rotate_degrees (int, optional): The degrees to rotate the image.
|
328 |
+
noise_level (int, optional): The noise level to use.
|
329 |
+
sharpen_strength (int, optional): The sharpen strength to use.
|
330 |
+
|
331 |
+
Returns:
|
332 |
+
dict: A dictionary containing the model details, classification scores, and label.
|
333 |
+
"""
|
334 |
+
# Ensure img is a PIL Image object
|
335 |
+
if img is None:
|
336 |
+
raise gr.Error("No image provided. Please upload an image to analyze.")
|
337 |
+
# Handle filepath conversion if needed
|
338 |
+
if isinstance(img, str):
|
339 |
+
try:
|
340 |
+
img = load_image(img)
|
341 |
+
except Exception as e:
|
342 |
+
logger.error(f"Error loading image from path: {e}")
|
343 |
+
raise gr.Error(f"Could not load image from the provided path. Error: {str(e)}")
|
344 |
+
|
345 |
+
if not isinstance(img, Image.Image):
|
346 |
+
try:
|
347 |
+
img = Image.fromarray(img)
|
348 |
+
except Exception as e:
|
349 |
+
logger.error(f"Error converting input image to PIL: {e}")
|
350 |
+
raise gr.Error("Input image could not be converted to a valid image format. Please try another image.")
|
351 |
+
|
352 |
+
# Ensure image is in RGB format for consistent processing
|
353 |
+
if img.mode != 'RGB':
|
354 |
+
img = img.convert('RGB')
|
355 |
+
|
356 |
+
monitor_agent = EnsembleMonitorAgent()
|
357 |
+
weight_manager = ModelWeightManager(strongest_model_id="simple_prediction")
|
358 |
+
optimization_agent = WeightOptimizationAgent(weight_manager)
|
359 |
+
health_agent = SystemHealthAgent()
|
360 |
+
context_agent = ContextualIntelligenceAgent()
|
361 |
+
anomaly_agent = ForensicAnomalyDetectionAgent()
|
362 |
+
health_agent.monitor_system_health()
|
363 |
+
if rotate_degrees or noise_level or sharpen_strength:
|
364 |
+
img_pil, _ = augment_image(img, ["rotate", "add_noise", "sharpen"], rotate_degrees, noise_level, sharpen_strength)
|
365 |
+
else:
|
366 |
+
img_pil = img
|
367 |
+
img_np_og = np.array(img)
|
368 |
+
|
369 |
+
model_predictions_raw = {}
|
370 |
+
confidence_scores = {}
|
371 |
+
results = []
|
372 |
+
table_rows = []
|
373 |
+
|
374 |
+
# Stream results as each model finishes
|
375 |
+
for model_id in MODEL_REGISTRY:
|
376 |
+
model_start = time.time()
|
377 |
+
result = infer(img_pil, model_id, confidence_threshold)
|
378 |
+
model_end = time.time()
|
379 |
+
monitor_agent.monitor_prediction(
|
380 |
+
model_id,
|
381 |
+
result["Label"],
|
382 |
+
max(result.get("AI Score", 0.0), result.get("Real Score", 0.0)),
|
383 |
+
model_end - model_start
|
384 |
+
)
|
385 |
+
model_predictions_raw[model_id] = result
|
386 |
+
confidence_scores[model_id] = max(result.get("AI Score", 0.0), result.get("Real Score", 0.0))
|
387 |
+
results.append(result)
|
388 |
+
table_rows.append([
|
389 |
+
result.get("Model", ""),
|
390 |
+
result.get("Contributor", ""),
|
391 |
+
round(result.get("AI Score", 0.0), 3) if result.get("AI Score") is not None else 0.0,
|
392 |
+
round(result.get("Real Score", 0.0), 3) if result.get("Real Score") is not None else 0.0,
|
393 |
+
result.get("Label", "Error")
|
394 |
+
])
|
395 |
+
# Yield partial results: only update the table, others are None
|
396 |
+
yield None, None, table_rows, None, None
|
397 |
+
|
398 |
+
# After all models, compute the rest as before
|
399 |
+
image_data_for_context = {
|
400 |
+
"width": img.width,
|
401 |
+
"height": img.height,
|
402 |
+
"mode": img.mode,
|
403 |
+
}
|
404 |
+
detected_context_tags = context_agent.infer_context_tags(image_data_for_context, model_predictions_raw)
|
405 |
+
logger.info(f"Detected context tags: {detected_context_tags}")
|
406 |
+
adjusted_weights = weight_manager.adjust_weights(model_predictions_raw, confidence_scores, context_tags=detected_context_tags)
|
407 |
+
weighted_predictions = {"AI": 0.0, "REAL": 0.0, "UNCERTAIN": 0.0}
|
408 |
+
for model_id, prediction in model_predictions_raw.items():
|
409 |
+
prediction_label = prediction.get("Label")
|
410 |
+
if prediction_label in weighted_predictions:
|
411 |
+
weighted_predictions[prediction_label] += adjusted_weights[model_id]
|
412 |
+
else:
|
413 |
+
logger.warning(f"Unexpected prediction label '{prediction_label}' from model '{model_id}'. Skipping its weight in consensus.")
|
414 |
+
final_prediction_label = "UNCERTAIN"
|
415 |
+
if weighted_predictions["AI"] > weighted_predictions["REAL"] and weighted_predictions["AI"] > weighted_predictions["UNCERTAIN"]:
|
416 |
+
final_prediction_label = "AI"
|
417 |
+
elif weighted_predictions["REAL"] > weighted_predictions["AI"] and weighted_predictions["REAL"] > weighted_predictions["UNCERTAIN"]:
|
418 |
+
final_prediction_label = "REAL"
|
419 |
+
optimization_agent.analyze_performance(final_prediction_label, None)
|
420 |
+
gradient_image = gradient_processing(img_np_og)
|
421 |
+
gradient_image2 = gradient_processing(img_np_og, intensity=45, equalize=True)
|
422 |
+
minmax_image = minmax_process(img_np_og)
|
423 |
+
minmax_image2 = minmax_process(img_np_og, radius=6)
|
424 |
+
# bitplane_image = bit_plane_extractor(img_pil)
|
425 |
+
ela1 = ELA(img_np_og, quality=75, scale=50, contrast=20, linear=False, grayscale=True)
|
426 |
+
ela2 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=True)
|
427 |
+
ela3 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=False)
|
428 |
+
forensics_images = [img_pil, ela1, ela2, ela3, gradient_image, gradient_image2, minmax_image, minmax_image2]
|
429 |
+
forensic_output_descriptions = [
|
430 |
+
f"Original augmented image (PIL): {img_pil.width}x{img_pil.height}",
|
431 |
+
"ELA analysis (Pass 1): Grayscale error map, quality 75.",
|
432 |
+
"ELA analysis (Pass 2): Grayscale error map, quality 75, enhanced contrast.",
|
433 |
+
"ELA analysis (Pass 3): Color error map, quality 75, enhanced contrast.",
|
434 |
+
"Gradient processing: Highlights edges and transitions.",
|
435 |
+
"Gradient processing: Int=45, Equalize=True",
|
436 |
+
"MinMax processing: Deviations in local pixel values.",
|
437 |
+
"MinMax processing (Radius=6): Deviations in local pixel values.",
|
438 |
+
# "Bit Plane extractor: Visualization of individual bit planes from different color channels."
|
439 |
+
]
|
440 |
+
anomaly_detection_results = anomaly_agent.analyze_forensic_outputs(forensic_output_descriptions)
|
441 |
+
logger.info(f"Forensic anomaly detection: {anomaly_detection_results['summary']}")
|
442 |
+
consensus_html = f"<div style='font-size: 2.2em; font-weight: bold;padding: 10px;'>Consensus: <span style='color:{'red' if final_prediction_label == 'AI' else ('green' if final_prediction_label == 'REAL' else 'orange')}'>{final_prediction_label}</span></div>"
|
443 |
+
inference_params = {
|
444 |
+
"confidence_threshold": confidence_threshold,
|
445 |
+
"rotate_degrees": rotate_degrees,
|
446 |
+
"noise_level": noise_level,
|
447 |
+
"sharpen_strength": sharpen_strength,
|
448 |
+
"detected_context_tags": detected_context_tags
|
449 |
+
}
|
450 |
+
ensemble_output_data = {
|
451 |
+
"final_prediction_label": final_prediction_label,
|
452 |
+
"weighted_predictions": weighted_predictions,
|
453 |
+
"adjusted_weights": adjusted_weights
|
454 |
+
}
|
455 |
+
agent_monitoring_data_log = {
|
456 |
+
"ensemble_monitor": {
|
457 |
+
"alerts": monitor_agent.alerts,
|
458 |
+
"performance_metrics": monitor_agent.performance_metrics
|
459 |
+
},
|
460 |
+
"weight_optimization": {
|
461 |
+
"prediction_history_length": len(optimization_agent.prediction_history),
|
462 |
+
},
|
463 |
+
"system_health": {
|
464 |
+
"memory_usage": health_agent.health_metrics["memory_usage"],
|
465 |
+
"gpu_utilization": health_agent.health_metrics["gpu_utilization"]
|
466 |
+
},
|
467 |
+
"context_intelligence": {
|
468 |
+
"detected_context_tags": detected_context_tags
|
469 |
+
},
|
470 |
+
"forensic_anomaly_detection": anomaly_detection_results
|
471 |
+
}
|
472 |
+
log_inference_data(
|
473 |
+
original_image=img,
|
474 |
+
inference_params=inference_params,
|
475 |
+
model_predictions=results,
|
476 |
+
ensemble_output=ensemble_output_data,
|
477 |
+
forensic_images=forensics_images,
|
478 |
+
agent_monitoring_data=agent_monitoring_data_log,
|
479 |
+
human_feedback=None
|
480 |
+
)
|
481 |
+
cleaned_forensics_images = []
|
482 |
+
for f_img in forensics_images:
|
483 |
+
if isinstance(f_img, Image.Image):
|
484 |
+
cleaned_forensics_images.append(f_img)
|
485 |
+
elif isinstance(f_img, np.ndarray):
|
486 |
+
try:
|
487 |
+
cleaned_forensics_images.append(Image.fromarray(f_img))
|
488 |
+
except Exception as e:
|
489 |
+
logger.warning(f"Could not convert numpy array to PIL Image for gallery: {e}")
|
490 |
+
else:
|
491 |
+
logger.warning(f"Unexpected type in forensic_images: {type(f_img)}. Skipping.")
|
492 |
+
logger.info(f"Cleaned forensic images types: {[type(img) for img in cleaned_forensics_images]}")
|
493 |
+
for i, res_dict in enumerate(results):
|
494 |
+
for key in ["AI Score", "Real Score"]:
|
495 |
+
value = res_dict.get(key)
|
496 |
+
if isinstance(value, np.float32):
|
497 |
+
res_dict[key] = float(value)
|
498 |
+
logger.info(f"Converted {key} for result {i} from numpy.float32 to float.")
|
499 |
+
json_results = json.dumps(results, cls=NumpyEncoder)
|
500 |
+
yield img_pil, cleaned_forensics_images, table_rows, json_results, consensus_html
|
501 |
+
|
502 |
+
detection_model_eval_playground = gr.Interface(
|
503 |
+
fn=full_prediction,
|
504 |
+
inputs=[
|
505 |
+
gr.Image(label="Upload Image to Analyze", sources=['upload', 'webcam'], type='filepath'),
|
506 |
+
gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Confidence Threshold"),
|
507 |
+
gr.Slider(0, 45, value=0, step=1, label="Rotate Degrees", visible=False),
|
508 |
+
gr.Slider(0, 50, value=0, step=1, label="Noise Level", visible=False),
|
509 |
+
gr.Slider(0, 50, value=0, step=1, label="Sharpen Strength", visible=False)
|
510 |
+
],
|
511 |
+
outputs=[
|
512 |
+
gr.Image(label="Processed Image", visible=False),
|
513 |
+
gr.Gallery(label="Post Processed Images", visible=True, columns=[4], rows=[2], container=False, height="auto", object_fit="contain", elem_id="post-gallery"),
|
514 |
+
gr.Dataframe(
|
515 |
+
label="Model Predictions",
|
516 |
+
headers=["Arch / Dataset", "By", "AI", "Real", "Label"],
|
517 |
+
datatype=["str", "str", "number", "number", "str"]
|
518 |
+
),
|
519 |
+
gr.JSON(label="Raw Model Results", visible=False),
|
520 |
+
gr.Markdown(label="Consensus", value="")
|
521 |
+
],
|
522 |
+
title="Multi-Model Ensemble + Agentic Coordinated Deepfake Detection (Paper in Progress)",
|
523 |
+
description="The detection of AI-generated images has entered a critical inflection point. While existing solutions struggle with outdated datasets and inflated claims, our approach prioritizes agility, community collaboration, and an offensive approach to deepfake detection.",
|
524 |
+
api_name="predict",
|
525 |
+
live=True # Enable streaming
|
526 |
+
)
|
527 |
+
# def echo_headers(x, request: gr.Request):
|
528 |
+
# print(dict(request.headers))
|
529 |
+
# return str(dict(request.headers))
|
530 |
+
|
531 |
+
|
532 |
+
def predict(img):
|
533 |
+
"""
|
534 |
+
Predicts whether an image is AI-generated or real using the SOTA Community Forensics model.
|
535 |
+
|
536 |
+
Args:
|
537 |
+
img (str): Path to the input image file to analyze.
|
538 |
+
|
539 |
+
Returns:
|
540 |
+
dict: A dictionary containing:
|
541 |
+
- 'Fake Probability' (float): Probability score between 0 and 1 indicating likelihood of being AI-generated
|
542 |
+
- 'Result Description' (str): Human-readable description of the prediction result
|
543 |
+
|
544 |
+
Example:
|
545 |
+
>>> result = predict("path/to/image.jpg")
|
546 |
+
>>> print(result)
|
547 |
+
{'Fake Probability': 0.002, 'Result Description': 'The image is likely real.'}
|
548 |
+
"""
|
549 |
+
client = Client("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview")
|
550 |
+
client.view_api()
|
551 |
+
result = client.predict(
|
552 |
+
handle_file(img),
|
553 |
+
api_name="/simple_predict"
|
554 |
+
)
|
555 |
+
return str(result)
|
556 |
+
community_forensics_preview = gr.Interface(
|
557 |
+
fn=predict,
|
558 |
+
inputs=gr.Image(type="filepath"),
|
559 |
+
outputs=gr.HTML(), # or gr.Markdown() if it's just text
|
560 |
+
title="Quick and simple prediction by our strongest model.",
|
561 |
+
description="No ensemble, no context, no agents, just a quick and simple prediction by our strongest model.",
|
562 |
+
api_name="predict"
|
563 |
+
)
|
564 |
+
|
565 |
+
# leaderboard = gr.Interface(
|
566 |
+
# fn=lambda: "# AI Generated / Deepfake Detection Models Leaderboard: Soonβ’",
|
567 |
+
# inputs=None,
|
568 |
+
# outputs=gr.Markdown(),
|
569 |
+
# title="Leaderboard",
|
570 |
+
# api_name="leaderboard"
|
571 |
+
# )
|
572 |
+
def simple_prediction(img):
|
573 |
+
"""
|
574 |
+
Quick and simple deepfake or real image prediction by the strongest open-source model on the hub.
|
575 |
+
|
576 |
+
Args:
|
577 |
+
img (str): The input image to analyze, provided as a file path.
|
578 |
+
|
579 |
+
Returns:
|
580 |
+
str: The prediction result stringified from dict. Example: `{'Fake Probability': 0.002, 'Result Description': 'The image is likely real.'}`
|
581 |
+
"""
|
582 |
+
client = Client("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview")
|
583 |
+
client.view_api()
|
584 |
+
client.predict(
|
585 |
+
handle_file(img),
|
586 |
+
api_name="simple_predict"
|
587 |
+
)
|
588 |
+
simple_predict_interface = gr.Interface(
|
589 |
+
fn=simple_prediction,
|
590 |
+
inputs=gr.Image(type="filepath"),
|
591 |
+
outputs=gr.Text(),
|
592 |
+
title="Quick and simple prediction by our strongest model.",
|
593 |
+
description="No ensemble, no context, no agents, just a quick and simple prediction by our strongest model.",
|
594 |
+
api_name="simple_predict"
|
595 |
+
)
|
596 |
+
|
597 |
+
noise_estimation_interface = gr.Interface(
|
598 |
+
fn=noise_estimation,
|
599 |
+
inputs=[gr.Image(type="pil"), gr.Slider(1, 32, value=8, step=1, label="Block Size")],
|
600 |
+
outputs=gr.Image(type="pil"),
|
601 |
+
title="Wavelet-Based Noise Analysis",
|
602 |
+
description="Analyzes image noise patterns using wavelet decomposition. This tool helps detect compression artifacts and artificial noise patterns that may indicate image manipulation. Higher noise levels in specific regions can reveal areas of potential tampering.",
|
603 |
+
api_name="tool_waveletnoise"
|
604 |
+
)
|
605 |
+
|
606 |
+
bit_plane_interface = gr.Interface(
|
607 |
+
fn=bit_plane_extractor,
|
608 |
+
inputs=[
|
609 |
+
gr.Image(type="pil"),
|
610 |
+
gr.Dropdown(["Luminance", "Red", "Green", "Blue", "RGB Norm"], label="Channel", value="Luminance"),
|
611 |
+
gr.Slider(0, 7, value=0, step=1, label="Bit Plane"),
|
612 |
+
gr.Dropdown(["Disabled", "Median", "Gaussian"], label="Filter", value="Disabled")
|
613 |
+
],
|
614 |
+
outputs=gr.Image(type="pil"),
|
615 |
+
title="Bit Plane Analysis",
|
616 |
+
description="Extracts and visualizes individual bit planes from different color channels. This forensic tool helps identify hidden patterns and artifacts in image data that may indicate manipulation. Different bit planes can reveal inconsistencies in image processing or editing.",
|
617 |
+
api_name="tool_bitplane"
|
618 |
+
)
|
619 |
+
|
620 |
+
ela_interface = gr.Interface(
|
621 |
+
fn=ELA,
|
622 |
+
inputs=[
|
623 |
+
gr.Image(type="pil", label="Input Image"),
|
624 |
+
gr.Slider(1, 100, value=75, step=1, label="JPEG Quality"),
|
625 |
+
gr.Slider(1, 100, value=50, step=1, label="Output Scale (Multiplicative Gain)"),
|
626 |
+
gr.Slider(0, 100, value=20, step=1, label="Output Contrast (Tonality Compression)"),
|
627 |
+
gr.Checkbox(value=False, label="Use Linear Difference"),
|
628 |
+
gr.Checkbox(value=False, label="Grayscale Output")
|
629 |
+
],
|
630 |
+
outputs=gr.Image(type="pil"),
|
631 |
+
title="Error Level Analysis (ELA)",
|
632 |
+
description="Performs Error Level Analysis to detect re-saved JPEG images, which can indicate tampering. ELA highlights areas of an image that have different compression levels.",
|
633 |
+
api_name="tool_ela"
|
634 |
+
)
|
635 |
+
|
636 |
+
gradient_processing_interface = gr.Interface(
|
637 |
+
fn=gradient_processing,
|
638 |
+
inputs=[
|
639 |
+
gr.Image(type="pil", label="Input Image"),
|
640 |
+
gr.Slider(0, 100, value=90, step=1, label="Intensity"),
|
641 |
+
gr.Dropdown(["Abs", "None", "Flat", "Norm"], label="Blue Mode", value="Abs"),
|
642 |
+
gr.Checkbox(value=False, label="Invert Gradients"),
|
643 |
+
gr.Checkbox(value=False, label="Equalize Histogram")
|
644 |
+
],
|
645 |
+
outputs=gr.Image(type="pil"),
|
646 |
+
title="Gradient Processing",
|
647 |
+
description="Applies gradient filters to an image to enhance edges and transitions, which can reveal inconsistencies due to manipulation.",
|
648 |
+
api_name="tool_gradient_processing"
|
649 |
+
)
|
650 |
+
|
651 |
+
minmax_processing_interface = gr.Interface(
|
652 |
+
fn=minmax_process,
|
653 |
+
inputs=[
|
654 |
+
gr.Image(type="pil", label="Input Image"),
|
655 |
+
gr.Radio([0, 1, 2, 3, 4], label="Channel (0:Grayscale, 1:Blue, 2:Green, 3:Red, 4:RGB Norm)", value=4),
|
656 |
+
gr.Slider(0, 10, value=2, step=1, label="Radius")
|
657 |
+
],
|
658 |
+
outputs=gr.Image(type="pil"),
|
659 |
+
title="MinMax Processing",
|
660 |
+
description="Analyzes local pixel value deviations to detect subtle changes in image data, often indicative of digital forgeries.",
|
661 |
+
api_name="tool_minmax_processing"
|
662 |
+
)
|
663 |
+
|
664 |
+
# augmentation_tool_interface = gr.Interface(
|
665 |
+
# fn=augment_image,
|
666 |
+
# inputs=[
|
667 |
+
# gr.Image(label="Upload Image to Augment", sources=['upload', 'webcam'], type='pil'),
|
668 |
+
# gr.CheckboxGroup(["rotate", "add_noise", "sharpen"], label="Augmentation Methods"),
|
669 |
+
# gr.Slider(0, 360, value=0, step=1, label="Rotate Degrees", visible=True),
|
670 |
+
# gr.Slider(0, 100, value=0, step=1, label="Noise Level", visible=True),
|
671 |
+
# gr.Slider(0, 200, value=1, step=1, label="Sharpen Strength", visible=True)
|
672 |
+
# ],
|
673 |
+
# outputs=gr.Image(label="Augmented Image", type='pil'),
|
674 |
+
# title="Image Augmentation Tool",
|
675 |
+
# description="Apply various augmentation techniques to your image.",
|
676 |
+
# api_name="augment_image"
|
677 |
+
# )
|
678 |
+
|
679 |
+
# def get_captured_logs():
|
680 |
+
# # Retrieve all logs from the queue and clear it
|
681 |
+
# logs = list(log_queue)
|
682 |
+
# log_queue.clear() # Clear the queue after retrieving
|
683 |
+
# return "\n".join(logs)
|
684 |
+
|
685 |
+
|
686 |
+
demo = gr.TabbedInterface(
|
687 |
+
[
|
688 |
+
detection_model_eval_playground,
|
689 |
+
community_forensics_preview,
|
690 |
+
noise_estimation_interface,
|
691 |
+
bit_plane_interface,
|
692 |
+
ela_interface,
|
693 |
+
gradient_processing_interface,
|
694 |
+
minmax_processing_interface,
|
695 |
+
# gr.Textbox(label="Agent Logs", interactive=False, lines=5, max_lines=20, autoscroll=True) # New textbox for logs
|
696 |
+
],
|
697 |
+
[
|
698 |
+
"Run Ensemble Prediction",
|
699 |
+
"Open-Source SOTA Model",
|
700 |
+
"Wavelet Blocking Noise Estimation",
|
701 |
+
"Bit Plane Values",
|
702 |
+
"Error Level Analysis (ELA)",
|
703 |
+
"Gradient Processing",
|
704 |
+
"MinMax Processing",
|
705 |
+
# "Agent Logs" # New tab title
|
706 |
+
],
|
707 |
+
title="Deepfake Detection & Forensics Tools",
|
708 |
+
theme=None,
|
709 |
+
|
710 |
+
)
|
711 |
+
footerMD = """
|
712 |
+
### β οΈ ENSEMBLE TEAM IN TRAINING β οΈ \n\n
|
713 |
+
|
714 |
+
1. **DISCLAIMER: METADATA AS WELL AS MEDIA SUBMITTED TO THIS SPACE MAY BE VIEWED AND SELECTED FOR FUTURE DATASETS, PLEASE DO NOT SUBMIT PERSONAL CONTENT. FOR UNTRACKED, PRIVATE USE OF THE MODELS YOU MAY STILL USE [THE ORIGINAL SPACE HERE](https://huggingface.co/spaces/aiwithoutborders-xyz/OpenSight-Deepfake-Detection-Models-Playground), SOTA MODEL INCLUDED.**
|
715 |
+
2. **UPDATE 6-13-25**: APOLOGIES FOR THE CONFUSION, WE ARE WORKING TO REVERT THE ORIGINAL REPO BACK TO ITS NON-DATA COLLECTION STATE -- ONLY THE "SIMPLE PREDICTION" ENDPOINT IS CURRENTLY 100% PRIVATE. PLEASE STAY TUNED AS WE FIGURE OUT A SOLUTION FOR THE ENSEMBLE + AGENT TEAM ENDPOINT. IT CAN GET RESOURCE INTENSIVE TO RUN A FULL PREDICTION. ALTERNATIVELY, WE **ENCOURAGE** ANYONE TO FORK AND CONTRIBUTE TO THE PROJECT.
|
716 |
+
3. **UPDATE 6-13-25 (cont.)**: WHILE WE HAVE NOT TAKEN A STANCE ON NSFW AND EXPLICIT CONTENT, PLEASE REFRAIN FROM ... YOUR HUMAN DESIRES UNTIL WE GET THIS PRIVACY SITUATION SORTED OUT. DO NOT BE RECKLESS PLEASE. OUR PAPER WILL BE OUT SOON ON ARXIV WHICH WILL EXPLAIN EVERYTHING WITH DATA-BACKED RESEARCH ON WHY THIS PROJECT IS NEEDED, BUT WE CANNOT DO IT WITHOUT THE HELP OF THE COMMUNITY.
|
717 |
+
|
718 |
+
TO SUMMARIZE: DATASET COLLECTION WILL CONTINUE FOR OUR NOVEL ENSEMBLE-TEAM PREDICTION PIPELINE UNTIL WE CAN GET THINGS SORTED OUT. FOR THOSE THAT WISH TO OPT-OUT, WE OFFER THE SIMPLE, BUT [MOST POWERFUL DETECTION MODEL HERE.](https://huggingface.co/spaces/aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview)
|
719 |
+
|
720 |
+
"""
|
721 |
+
footer = gr.Markdown("", elem_classes="footer")
|
722 |
+
|
723 |
+
with gr.Blocks() as app:
|
724 |
+
demo.render()
|
725 |
+
footer.render()
|
726 |
+
|
727 |
+
|
728 |
+
app.queue(max_size=10, default_concurrency_limit=2).launch(mcp_server=True)
|
app_mcp.py β app_mcp.old.py
RENAMED
@@ -3,9 +3,8 @@ import time
|
|
3 |
from typing import Literal
|
4 |
import spaces
|
5 |
import gradio as gr
|
6 |
-
import
|
7 |
-
|
8 |
-
import modelscope_studio.components.base as ms
|
9 |
from transformers import pipeline, AutoImageProcessor, SwinForImageClassification, Swinv2ForImageClassification, AutoFeatureExtractor, AutoModelForImageClassification
|
10 |
from torchvision import transforms
|
11 |
import torch
|
@@ -14,18 +13,18 @@ import numpy as np
|
|
14 |
import io
|
15 |
import logging
|
16 |
from utils.utils import softmax, augment_image, convert_pil_to_bytes
|
17 |
-
from
|
18 |
-
from
|
19 |
-
from
|
20 |
-
from
|
21 |
-
from
|
22 |
from utils.hf_logger import log_inference_data
|
23 |
from utils.text_content import QUICK_INTRO, IMPLEMENTATION
|
24 |
-
from agents.
|
25 |
from agents.smart_agents import ContextualIntelligenceAgent, ForensicAnomalyDetectionAgent
|
26 |
|
27 |
-
from
|
28 |
-
from agents.
|
29 |
from dotenv import load_dotenv
|
30 |
import json
|
31 |
from huggingface_hub import CommitScheduler
|
@@ -93,7 +92,6 @@ MODEL_PATHS = {
|
|
93 |
"model_3": "Organika/sdxl-detector",
|
94 |
"model_4": "cmckinle/sdxl-flux-detector_v1.1",
|
95 |
"model_5": "prithivMLmods/Deep-Fake-Detector-v2-Model",
|
96 |
-
"model_5b": "prithivMLmods/Deepfake-Detection-Exp-02-22",
|
97 |
"model_6": "ideepankarsharma2003/AI_ImageClassification_MidjourneyV6_SDXL",
|
98 |
"model_7": "date3k2/vit-real-fake-classification-v4"
|
99 |
}
|
@@ -104,22 +102,19 @@ CLASS_NAMES = {
|
|
104 |
"model_3": ['AI', 'Real'],
|
105 |
"model_4": ['AI', 'Real'],
|
106 |
"model_5": ['Realism', 'Deepfake'],
|
107 |
-
"model_5b": ['Real', 'Deepfake'],
|
108 |
"model_6": ['ai_gen', 'human'],
|
109 |
"model_7": ['Fake', 'Real'],
|
110 |
|
111 |
}
|
112 |
|
113 |
-
def preprocess_resize_256(image):
|
114 |
-
if image.mode != 'RGB':
|
115 |
-
image = image.convert('RGB')
|
116 |
-
return transforms.Resize((256, 256))(image)
|
117 |
-
|
118 |
def preprocess_resize_224(image):
|
119 |
if image.mode != 'RGB':
|
120 |
image = image.convert('RGB')
|
121 |
return transforms.Resize((224, 224))(image)
|
122 |
-
|
|
|
|
|
|
|
123 |
def postprocess_pipeline(prediction, class_names):
|
124 |
# Assumes HuggingFace pipeline output
|
125 |
return {pred['label']: pred['score'] for pred in prediction}
|
@@ -146,7 +141,7 @@ image_processor_1 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_1"], u
|
|
146 |
model_1 = Swinv2ForImageClassification.from_pretrained(MODEL_PATHS["model_1"]).to(device)
|
147 |
clf_1 = pipeline(model=model_1, task="image-classification", image_processor=image_processor_1, device=device)
|
148 |
register_model_with_metadata(
|
149 |
-
"model_1", clf_1,
|
150 |
display_name="SwinV2 Based", contributor="haywoodsloan", model_path=MODEL_PATHS["model_1"]
|
151 |
)
|
152 |
|
@@ -159,10 +154,7 @@ register_model_with_metadata(
|
|
159 |
# Register remaining models
|
160 |
feature_extractor_3 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_3"], device=device)
|
161 |
model_3 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_3"]).to(device)
|
162 |
-
|
163 |
-
if image.mode != 'RGB':
|
164 |
-
image = image.convert('RGB')
|
165 |
-
return transforms.Resize((256, 256))(image)
|
166 |
def postprocess_logits_model3(outputs, class_names):
|
167 |
logits = outputs.logits.cpu().numpy()[0]
|
168 |
probabilities = softmax(logits)
|
@@ -199,11 +191,6 @@ register_model_with_metadata(
|
|
199 |
display_name="Vit Based", contributor="prithivMLmods", model_path=MODEL_PATHS["model_5"]
|
200 |
)
|
201 |
|
202 |
-
clf_5b = pipeline("image-classification", model=MODEL_PATHS["model_5b"], device=device)
|
203 |
-
register_model_with_metadata(
|
204 |
-
"model_5b", clf_5b, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_5b"],
|
205 |
-
display_name="Vit Based, Newer Dataset", contributor="prithivMLmods", model_path=MODEL_PATHS["model_5b"]
|
206 |
-
)
|
207 |
|
208 |
image_processor_6 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_6"], use_fast=True)
|
209 |
model_6 = SwinForImageClassification.from_pretrained(MODEL_PATHS["model_6"]).to(device)
|
@@ -255,7 +242,7 @@ def infer(image: Image.Image, model_id: str, confidence_threshold: float = 0.75)
|
|
255 |
|
256 |
def predict_image(img, confidence_threshold):
|
257 |
model_ids = [
|
258 |
-
"model_1", "model_2", "model_3", "model_4", "model_5", "
|
259 |
]
|
260 |
results = [infer(img, model_id, confidence_threshold) for model_id in model_ids]
|
261 |
return img, results
|
@@ -268,9 +255,9 @@ def get_consensus_label(results):
|
|
268 |
color = {"AI": "red", "REAL": "green", "UNCERTAIN": "orange"}.get(consensus, "gray")
|
269 |
return f"<b><span style='color:{color}'>{consensus}</span></b>"
|
270 |
|
271 |
-
# Update
|
272 |
|
273 |
-
def
|
274 |
# Ensure img is a PIL Image (if it's not already)
|
275 |
if not isinstance(img, Image.Image):
|
276 |
try:
|
@@ -364,7 +351,12 @@ def predict_image_with_json(img, confidence_threshold, augment_methods, rotate_d
|
|
364 |
|
365 |
# 6. Perform forensic processing
|
366 |
gradient_image = gradient_processing(img_np_og) # Added gradient processing
|
367 |
-
|
|
|
|
|
|
|
|
|
|
|
368 |
|
369 |
# First pass - standard analysis
|
370 |
ela1 = ELA(img_np_og, quality=75, scale=50, contrast=20, linear=False, grayscale=True)
|
@@ -373,7 +365,7 @@ def predict_image_with_json(img, confidence_threshold, augment_methods, rotate_d
|
|
373 |
ela2 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=True)
|
374 |
ela3 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=False)
|
375 |
|
376 |
-
forensics_images = [img_pil, ela1, ela2, ela3, gradient_image, minmax_image]
|
377 |
|
378 |
# 7. Generate boilerplate descriptions for forensic outputs for anomaly agent
|
379 |
forensic_output_descriptions = [
|
@@ -382,7 +374,9 @@ def predict_image_with_json(img, confidence_threshold, augment_methods, rotate_d
|
|
382 |
"ELA analysis (Pass 2): Grayscale error map, quality 75, enhanced contrast.",
|
383 |
"ELA analysis (Pass 3): Color error map, quality 75, enhanced contrast.",
|
384 |
"Gradient processing: Highlights edges and transitions.",
|
385 |
-
"
|
|
|
|
|
386 |
]
|
387 |
# You could also add descriptions for Wavelet and Bit Plane if they were dynamic outputs
|
388 |
# For instance, if wavelet_blocking_noise_estimation had parameters that changed and you wanted to describe them.
|
@@ -406,7 +400,7 @@ def predict_image_with_json(img, confidence_threshold, augment_methods, rotate_d
|
|
406 |
logger.info(f"Row {i} types: {[type(item) for item in row]}")
|
407 |
|
408 |
# The get_consensus_label function is now replaced by final_prediction_label from weighted consensus
|
409 |
-
consensus_html = f"<b
|
410 |
|
411 |
# Prepare data for logging to Hugging Face dataset
|
412 |
inference_params = {
|
@@ -484,6 +478,13 @@ def predict_image_with_json(img, confidence_threshold, augment_methods, rotate_d
|
|
484 |
|
485 |
return img_pil, cleaned_forensics_images, table_rows, json_results, consensus_html
|
486 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
487 |
with gr.Blocks(css="#post-gallery { overflow: hidden !important;} .grid-wrap{ overflow-y: hidden !important;} .ms-gr-ant-welcome-icon{ height:unset !important;} .tabs{margin-top:10px;}") as demo:
|
488 |
with ms.Application() as app:
|
489 |
with antd.ConfigProvider():
|
@@ -567,38 +568,151 @@ with gr.Blocks(css="#post-gallery { overflow: hidden !important;} .grid-wrap{ ov
|
|
567 |
api_name="/tool_waveletnoise"
|
568 |
)
|
569 |
|
|
|
570 |
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
|
590 |
-
|
591 |
-
|
592 |
-
|
|
|
|
|
593 |
)
|
594 |
-
|
595 |
-
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
601 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
602 |
# --- MCP-Ready Launch ---
|
603 |
if __name__ == "__main__":
|
604 |
# Initialize CommitScheduler
|
|
|
3 |
from typing import Literal
|
4 |
import spaces
|
5 |
import gradio as gr
|
6 |
+
from gradio_client import Client, handle_file
|
7 |
+
|
|
|
8 |
from transformers import pipeline, AutoImageProcessor, SwinForImageClassification, Swinv2ForImageClassification, AutoFeatureExtractor, AutoModelForImageClassification
|
9 |
from torchvision import transforms
|
10 |
import torch
|
|
|
13 |
import io
|
14 |
import logging
|
15 |
from utils.utils import softmax, augment_image, convert_pil_to_bytes
|
16 |
+
from forensics.gradient import gradient_processing
|
17 |
+
from forensics.minmax import minmax_process
|
18 |
+
from forensics.ela import ELA
|
19 |
+
from forensics.wavelet import wavelet_blocking_noise_estimation
|
20 |
+
from forensics.bitplane import bit_plane_extractor
|
21 |
from utils.hf_logger import log_inference_data
|
22 |
from utils.text_content import QUICK_INTRO, IMPLEMENTATION
|
23 |
+
from agents.ensemble_team import EnsembleMonitorAgent, WeightOptimizationAgent, SystemHealthAgent
|
24 |
from agents.smart_agents import ContextualIntelligenceAgent, ForensicAnomalyDetectionAgent
|
25 |
|
26 |
+
from utils.registry import register_model, MODEL_REGISTRY, ModelEntry
|
27 |
+
from agents.ensemble_weights import ModelWeightManager
|
28 |
from dotenv import load_dotenv
|
29 |
import json
|
30 |
from huggingface_hub import CommitScheduler
|
|
|
92 |
"model_3": "Organika/sdxl-detector",
|
93 |
"model_4": "cmckinle/sdxl-flux-detector_v1.1",
|
94 |
"model_5": "prithivMLmods/Deep-Fake-Detector-v2-Model",
|
|
|
95 |
"model_6": "ideepankarsharma2003/AI_ImageClassification_MidjourneyV6_SDXL",
|
96 |
"model_7": "date3k2/vit-real-fake-classification-v4"
|
97 |
}
|
|
|
102 |
"model_3": ['AI', 'Real'],
|
103 |
"model_4": ['AI', 'Real'],
|
104 |
"model_5": ['Realism', 'Deepfake'],
|
|
|
105 |
"model_6": ['ai_gen', 'human'],
|
106 |
"model_7": ['Fake', 'Real'],
|
107 |
|
108 |
}
|
109 |
|
|
|
|
|
|
|
|
|
|
|
110 |
def preprocess_resize_224(image):
|
111 |
if image.mode != 'RGB':
|
112 |
image = image.convert('RGB')
|
113 |
return transforms.Resize((224, 224))(image)
|
114 |
+
def preprocess_256(image):
|
115 |
+
if image.mode != 'RGB':
|
116 |
+
image = image.convert('RGB')
|
117 |
+
return transforms.Resize((256, 256))(image)
|
118 |
def postprocess_pipeline(prediction, class_names):
|
119 |
# Assumes HuggingFace pipeline output
|
120 |
return {pred['label']: pred['score'] for pred in prediction}
|
|
|
141 |
model_1 = Swinv2ForImageClassification.from_pretrained(MODEL_PATHS["model_1"]).to(device)
|
142 |
clf_1 = pipeline(model=model_1, task="image-classification", image_processor=image_processor_1, device=device)
|
143 |
register_model_with_metadata(
|
144 |
+
"model_1", clf_1, preprocess_256, postprocess_pipeline, CLASS_NAMES["model_1"],
|
145 |
display_name="SwinV2 Based", contributor="haywoodsloan", model_path=MODEL_PATHS["model_1"]
|
146 |
)
|
147 |
|
|
|
154 |
# Register remaining models
|
155 |
feature_extractor_3 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_3"], device=device)
|
156 |
model_3 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_3"]).to(device)
|
157 |
+
|
|
|
|
|
|
|
158 |
def postprocess_logits_model3(outputs, class_names):
|
159 |
logits = outputs.logits.cpu().numpy()[0]
|
160 |
probabilities = softmax(logits)
|
|
|
191 |
display_name="Vit Based", contributor="prithivMLmods", model_path=MODEL_PATHS["model_5"]
|
192 |
)
|
193 |
|
|
|
|
|
|
|
|
|
|
|
194 |
|
195 |
image_processor_6 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_6"], use_fast=True)
|
196 |
model_6 = SwinForImageClassification.from_pretrained(MODEL_PATHS["model_6"]).to(device)
|
|
|
242 |
|
243 |
def predict_image(img, confidence_threshold):
|
244 |
model_ids = [
|
245 |
+
"model_1", "model_2", "model_3", "model_4", "model_5", "model_6", "model_7"
|
246 |
]
|
247 |
results = [infer(img, model_id, confidence_threshold) for model_id in model_ids]
|
248 |
return img, results
|
|
|
255 |
color = {"AI": "red", "REAL": "green", "UNCERTAIN": "orange"}.get(consensus, "gray")
|
256 |
return f"<b><span style='color:{color}'>{consensus}</span></b>"
|
257 |
|
258 |
+
# Update predict_with_ensemble to return consensus label
|
259 |
|
260 |
+
def predict_with_ensemble(img, confidence_threshold, augment_methods, rotate_degrees, noise_level, sharpen_strength):
|
261 |
# Ensure img is a PIL Image (if it's not already)
|
262 |
if not isinstance(img, Image.Image):
|
263 |
try:
|
|
|
351 |
|
352 |
# 6. Perform forensic processing
|
353 |
gradient_image = gradient_processing(img_np_og) # Added gradient processing
|
354 |
+
gradient_image2 = gradient_processing(img_np_og, intensity=45, equalize=True) # Added gradient processing
|
355 |
+
|
356 |
+
minmax_image = minmax_process(img_np_og) # Added MinMax processing
|
357 |
+
minmax_image2 = minmax_process(img_np_og, radius=6) # Added MinMax processing
|
358 |
+
# bitplane_image = bit_plane_extractor(img_pil)
|
359 |
+
|
360 |
|
361 |
# First pass - standard analysis
|
362 |
ela1 = ELA(img_np_og, quality=75, scale=50, contrast=20, linear=False, grayscale=True)
|
|
|
365 |
ela2 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=True)
|
366 |
ela3 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=False)
|
367 |
|
368 |
+
forensics_images = [img_pil, ela1, ela2, ela3, gradient_image, gradient_image2, minmax_image, minmax_image2]
|
369 |
|
370 |
# 7. Generate boilerplate descriptions for forensic outputs for anomaly agent
|
371 |
forensic_output_descriptions = [
|
|
|
374 |
"ELA analysis (Pass 2): Grayscale error map, quality 75, enhanced contrast.",
|
375 |
"ELA analysis (Pass 3): Color error map, quality 75, enhanced contrast.",
|
376 |
"Gradient processing: Highlights edges and transitions.",
|
377 |
+
"Gradient processing: Int=45, Equalize=True",
|
378 |
+
"MinMax processing: Deviations in local pixel values.",
|
379 |
+
"MinMax processing (Radius=6): Deviations in local pixel values.",
|
380 |
]
|
381 |
# You could also add descriptions for Wavelet and Bit Plane if they were dynamic outputs
|
382 |
# For instance, if wavelet_blocking_noise_estimation had parameters that changed and you wanted to describe them.
|
|
|
400 |
logger.info(f"Row {i} types: {[type(item) for item in row]}")
|
401 |
|
402 |
# The get_consensus_label function is now replaced by final_prediction_label from weighted consensus
|
403 |
+
consensus_html = f"<div style='display: flex; justify-content: space-between;'><div style='flex: 1;'><b>THIS IMAGE IS LIKELY <span style='color:{'red' if final_prediction_label == 'AI' else ('green' if final_prediction_label == 'REAL' else 'orange')}'>{final_prediction_label}</span></b></div><div style='flex: 1;'><b>CONSENSUS REACHED BY {len(results)} MODELS</b></div></div>"
|
404 |
|
405 |
# Prepare data for logging to Hugging Face dataset
|
406 |
inference_params = {
|
|
|
478 |
|
479 |
return img_pil, cleaned_forensics_images, table_rows, json_results, consensus_html
|
480 |
|
481 |
+
def simple_prediction(img):
|
482 |
+
client = Client("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview")
|
483 |
+
result = client.predict(
|
484 |
+
input_image=handle_file(img),
|
485 |
+
api_name="/simple_predict"
|
486 |
+
)
|
487 |
+
return result
|
488 |
with gr.Blocks(css="#post-gallery { overflow: hidden !important;} .grid-wrap{ overflow-y: hidden !important;} .ms-gr-ant-welcome-icon{ height:unset !important;} .tabs{margin-top:10px;}") as demo:
|
489 |
with ms.Application() as app:
|
490 |
with antd.ConfigProvider():
|
|
|
568 |
api_name="/tool_waveletnoise"
|
569 |
)
|
570 |
|
571 |
+
with gr.Blocks(css="#post-gallery { overflow: hidden !important;} .grid-wrap{ overflow-y: hidden !important;} .ms-gr-ant-welcome-icon{ height:unset !important;} .tabs{margin-top:10px;}") as demo:
|
572 |
|
573 |
+
with gr.Tab("π Detection Models Eval / Playground"):
|
574 |
+
gr.Markdown("# Open Source Detection Models Found on the Hub\n\n - **Space will be upgraded shortly;** inference on all 6 models should take about 1.2~ seconds once we're back on CUDA.\n - The **Community Forensics** mother of all detection models is now available for inference, head to the middle tab above this.\n - Lots of exciting things coming up, stay tuned!")
|
575 |
+
|
576 |
+
with gr.Row():
|
577 |
+
with gr.Column(scale=1):
|
578 |
+
image_input = gr.Image(label="Upload Image to Analyze", sources=['upload', 'webcam'], type='pil')
|
579 |
+
with gr.Accordion("Settings (Optional)", open=False, elem_id="settings_accordion"):
|
580 |
+
augment_checkboxgroup = gr.CheckboxGroup(["rotate", "add_noise", "sharpen"], label="Augmentation Methods")
|
581 |
+
rotate_slider = gr.Slider(0, 45, value=0, step=1, label="Rotate Degrees", visible=False)
|
582 |
+
noise_slider = gr.Slider(0, 50, value=0, step=1, label="Noise Level", visible=False)
|
583 |
+
sharpen_slider = gr.Slider(0, 50, value=0, step=1, label="Sharpen Strength", visible=False)
|
584 |
+
confidence_slider = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Confidence Threshold")
|
585 |
+
inputs = [image_input, confidence_slider, augment_checkboxgroup, rotate_slider, noise_slider, sharpen_slider]
|
586 |
+
predict_button = gr.Button("Predict")
|
587 |
+
augment_button = gr.Button("Augment & Predict")
|
588 |
+
image_output = gr.Image(label="Processed Image", visible=False)
|
589 |
+
|
590 |
+
|
591 |
+
with gr.Column(scale=2):
|
592 |
+
# Use Gradio-native Dataframe to display results with headers
|
593 |
+
results_table = gr.Dataframe(
|
594 |
+
label="Model Predictions",
|
595 |
+
headers=["Model", "Contributor", "AI Score", "Real Score", "Label"],
|
596 |
+
datatype=["str", "str", "number", "number", "str"]
|
597 |
)
|
598 |
+
forensics_gallery = gr.Gallery(label="Post Processed Images", visible=True, columns=[4], rows=[2], container=False, height="auto", object_fit="contain", elem_id="post-gallery")
|
599 |
+
with gr.Accordion("Debug Output (Raw JSON)", open=False):
|
600 |
+
debug_json = gr.JSON(label="Raw Model Results")
|
601 |
+
consensus_md = gr.Markdown(label="Consensus", value="")
|
602 |
+
|
603 |
+
outputs = [image_output, forensics_gallery, results_table, debug_json, consensus_md]
|
604 |
+
|
605 |
+
predict_button.click(
|
606 |
+
fn=predict_with_ensemble,
|
607 |
+
inputs=inputs,
|
608 |
+
outputs=outputs,
|
609 |
+
api_name="predict"
|
610 |
+
)
|
611 |
+
augment_button.click( # Connect Augment button to the function
|
612 |
+
fn=predict_with_ensemble,
|
613 |
+
inputs=[
|
614 |
+
image_input,
|
615 |
+
confidence_slider,
|
616 |
+
gr.CheckboxGroup(["rotate", "add_noise", "sharpen"], value=["rotate", "add_noise", "sharpen"], visible=False), # Default values
|
617 |
+
rotate_slider,
|
618 |
+
noise_slider,
|
619 |
+
sharpen_slider
|
620 |
+
],
|
621 |
+
outputs=outputs,
|
622 |
+
api_name="augment_then_predict"
|
623 |
+
)
|
624 |
+
with gr.Tab("π Project Introduction"):
|
625 |
+
gr.Markdown(QUICK_INTRO)
|
626 |
+
|
627 |
+
with gr.Tab("π Community Forensics Preview"):
|
628 |
+
gr.load("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview", src="spaces")
|
629 |
+
with gr.Tab("π₯ Leaderboard"):
|
630 |
+
gr.Markdown("# AI Generated / Deepfake Detection Models Leaderboard: Soonβ’")
|
631 |
+
with gr.Tab("Simple Predict", visible=False):
|
632 |
+
gr.Interface(
|
633 |
+
fn=simple_prediction,
|
634 |
+
inputs=gr.Image(type="filepath"),
|
635 |
+
outputs=gr.Text(),
|
636 |
+
title="Simple and Fast Prediction",
|
637 |
+
description=""
|
638 |
+
)
|
639 |
+
with gr.Tab("Wavelet Blocking Noise Estimation", visible=False):
|
640 |
+
gr.Interface(
|
641 |
+
fn=wavelet_blocking_noise_estimation,
|
642 |
+
inputs=[gr.Image(type="pil"), gr.Slider(1, 32, value=8, step=1, label="Block Size")],
|
643 |
+
outputs=gr.Image(type="pil"),
|
644 |
+
title="Wavelet-Based Noise Analysis",
|
645 |
+
description="Analyzes image noise patterns using wavelet decomposition. This tool helps detect compression artifacts and artificial noise patterns that may indicate image manipulation. Higher noise levels in specific regions can reveal areas of potential tampering.",
|
646 |
+
api_name="tool_waveletnoise"
|
647 |
+
)
|
648 |
|
649 |
+
"""Forensics Tool: Bit Plane Extractor
|
650 |
+
|
651 |
+
Args:
|
652 |
+
image: PIL Image to analyze
|
653 |
+
channel: Color channel to extract bit plane from ("Luminance", "Red", "Green", "Blue", "RGB Norm")
|
654 |
+
bit_plane: Bit plane index to extract (0-7)
|
655 |
+
filter_type: Filter to apply ("Disabled", "Median", "Gaussian")
|
656 |
+
"""
|
657 |
+
with gr.Tab("Bit Plane Values", visible=False):
|
658 |
+
gr.Interface(
|
659 |
+
|
660 |
+
fn=bit_plane_extractor,
|
661 |
+
inputs=[
|
662 |
+
gr.Image(type="pil"),
|
663 |
+
gr.Dropdown(["Luminance", "Red", "Green", "Blue", "RGB Norm"], label="Channel", value="Luminance"),
|
664 |
+
gr.Slider(0, 7, value=0, step=1, label="Bit Plane"),
|
665 |
+
gr.Dropdown(["Disabled", "Median", "Gaussian"], label="Filter", value="Disabled")
|
666 |
+
],
|
667 |
+
outputs=gr.Image(type="pil"),
|
668 |
+
title="Bit Plane Analysis",
|
669 |
+
description="Extracts and visualizes individual bit planes from different color channels. This forensic tool helps identify hidden patterns and artifacts in image data that may indicate manipulation. Different bit planes can reveal inconsistencies in image processing or editing.",
|
670 |
+
api_name="tool_bitplane"
|
671 |
+
)
|
672 |
+
with gr.Tab("Error Level Analysis (ELA)", visible=False):
|
673 |
+
gr.Interface(
|
674 |
+
fn=ELA,
|
675 |
+
inputs=[
|
676 |
+
gr.Image(type="pil", label="Input Image"),
|
677 |
+
gr.Slider(1, 100, value=75, step=1, label="JPEG Quality"),
|
678 |
+
gr.Slider(1, 100, value=50, step=1, label="Output Scale (Multiplicative Gain)"),
|
679 |
+
gr.Slider(0, 100, value=20, step=1, label="Output Contrast (Tonality Compression)"),
|
680 |
+
gr.Checkbox(value=False, label="Use Linear Difference"),
|
681 |
+
gr.Checkbox(value=False, label="Grayscale Output")
|
682 |
+
],
|
683 |
+
outputs=gr.Image(type="pil"),
|
684 |
+
title="Error Level Analysis (ELA)",
|
685 |
+
description="Performs Error Level Analysis to detect re-saved JPEG images, which can indicate tampering. ELA highlights areas of an image that have different compression levels.",
|
686 |
+
api_name="tool_ela"
|
687 |
+
)
|
688 |
+
with gr.Tab("Gradient Processing", visible=False):
|
689 |
+
gr.Interface(
|
690 |
+
fn=gradient_processing,
|
691 |
+
inputs=[
|
692 |
+
gr.Image(type="pil", label="Input Image"),
|
693 |
+
gr.Slider(0, 100, value=90, step=1, label="Intensity"),
|
694 |
+
gr.Dropdown(["Abs", "None", "Flat", "Norm"], label="Blue Mode", value="Abs"),
|
695 |
+
gr.Checkbox(value=False, label="Invert Gradients"),
|
696 |
+
gr.Checkbox(value=False, label="Equalize Histogram")
|
697 |
+
],
|
698 |
+
outputs=gr.Image(type="pil"),
|
699 |
+
title="Gradient Processing",
|
700 |
+
description="Applies gradient filters to an image to enhance edges and transitions, which can reveal inconsistencies due to manipulation.",
|
701 |
+
api_name="tool_gradient_processing"
|
702 |
+
)
|
703 |
+
with gr.Tab("MinMax Processing", visible=False):
|
704 |
+
gr.Interface(
|
705 |
+
fn=minmax_process,
|
706 |
+
inputs=[
|
707 |
+
gr.Image(type="pil", label="Input Image"),
|
708 |
+
gr.Radio([0, 1, 2, 3, 4], label="Channel (0:Grayscale, 1:Blue, 2:Green, 3:Red, 4:RGB Norm)", value=4),
|
709 |
+
gr.Slider(0, 10, value=2, step=1, label="Radius")
|
710 |
+
],
|
711 |
+
outputs=gr.Image(type="pil"),
|
712 |
+
title="MinMax Processing",
|
713 |
+
description="Analyzes local pixel value deviations to detect subtle changes in image data, often indicative of digital forgeries.",
|
714 |
+
api_name="tool_minmax_processing"
|
715 |
+
)
|
716 |
# --- MCP-Ready Launch ---
|
717 |
if __name__ == "__main__":
|
718 |
# Initialize CommitScheduler
|
app_optimized.py
ADDED
@@ -0,0 +1,987 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from gradio_client import Client, handle_file
|
3 |
+
from PIL import Image, ImageFilter
|
4 |
+
import numpy as np
|
5 |
+
import os
|
6 |
+
import time
|
7 |
+
import logging
|
8 |
+
import io
|
9 |
+
import collections
|
10 |
+
import onnxruntime
|
11 |
+
import json
|
12 |
+
from huggingface_hub import CommitScheduler, hf_hub_download, snapshot_download
|
13 |
+
from dotenv import load_dotenv
|
14 |
+
import concurrent.futures
|
15 |
+
import ast
|
16 |
+
import torch
|
17 |
+
|
18 |
+
from utils.utils import softmax, augment_image
|
19 |
+
from forensics.gradient import gradient_processing
|
20 |
+
from forensics.minmax import minmax_process
|
21 |
+
from forensics.ela import ELA
|
22 |
+
from forensics.wavelet import noise_estimation
|
23 |
+
from forensics.bitplane import bit_plane_extractor
|
24 |
+
from utils.hf_logger import log_inference_data
|
25 |
+
from utils.load import load_image
|
26 |
+
from agents.ensemble_team import EnsembleMonitorAgent, WeightOptimizationAgent, SystemHealthAgent
|
27 |
+
from agents.smart_agents import ContextualIntelligenceAgent, ForensicAnomalyDetectionAgent
|
28 |
+
from utils.registry import register_model, MODEL_REGISTRY, ModelEntry
|
29 |
+
from agents.ensemble_weights import ModelWeightManager
|
30 |
+
from transformers import pipeline, AutoImageProcessor, SwinForImageClassification, Swinv2ForImageClassification, AutoFeatureExtractor, AutoModelForImageClassification
|
31 |
+
from torchvision import transforms
|
32 |
+
|
33 |
+
logging.basicConfig(level=logging.INFO)
|
34 |
+
logger = logging.getLogger(__name__)
|
35 |
+
os.environ['HF_HUB_CACHE'] = './models'
|
36 |
+
|
37 |
+
# --- Gradio Log Handler ---
|
38 |
+
class GradioLogHandler(logging.Handler):
|
39 |
+
def __init__(self, log_queue):
|
40 |
+
super().__init__()
|
41 |
+
self.log_queue = log_queue
|
42 |
+
self.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
43 |
+
|
44 |
+
def emit(self, record):
|
45 |
+
self.log_queue.append(self.format(record))
|
46 |
+
|
47 |
+
log_queue = collections.deque(maxlen=1000) # Store last 1000 log messages
|
48 |
+
gradio_handler = GradioLogHandler(log_queue)
|
49 |
+
|
50 |
+
# Set root logger level to DEBUG to capture all messages from agents
|
51 |
+
logging.getLogger().setLevel(logging.INFO)
|
52 |
+
logging.getLogger().addHandler(gradio_handler)
|
53 |
+
# --- End Gradio Log Handler ---
|
54 |
+
|
55 |
+
LOCAL_LOG_DIR = "./hf_inference_logs"
|
56 |
+
HF_DATASET_NAME="aiwithoutborders-xyz/degentic_rd0"
|
57 |
+
load_dotenv()
|
58 |
+
|
59 |
+
# Custom JSON Encoder to handle numpy types
|
60 |
+
class NumpyEncoder(json.JSONEncoder):
|
61 |
+
def default(self, obj):
|
62 |
+
if isinstance(obj, np.float32):
|
63 |
+
return float(obj)
|
64 |
+
return json.JSONEncoder.default(self, obj)
|
65 |
+
|
66 |
+
# Ensure using GPU if available
|
67 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
68 |
+
|
69 |
+
# Model paths and class names (copied from app_mcp.py)
|
70 |
+
MODEL_PATHS = {
|
71 |
+
"model_1": "LPX55/detection-model-1-ONNX",
|
72 |
+
"model_2": "LPX55/detection-model-2-ONNX",
|
73 |
+
"model_3": "LPX55/detection-model-3-ONNX",
|
74 |
+
"model_4": "cmckinle/sdxl-flux-detector_v1.1",
|
75 |
+
"model_5": "LPX55/detection-model-5-ONNX",
|
76 |
+
"model_6": "LPX55/detection-model-6-ONNX",
|
77 |
+
"model_7": "LPX55/detection-model-7-ONNX",
|
78 |
+
"model_8": "aiwithoutborders-xyz/CommunityForensics-DeepfakeDet-ViT"
|
79 |
+
}
|
80 |
+
|
81 |
+
CLASS_NAMES = {
|
82 |
+
"model_1": ['artificial', 'real'],
|
83 |
+
"model_2": ['AI Image', 'Real Image'],
|
84 |
+
"model_3": ['artificial', 'human'],
|
85 |
+
"model_4": ['AI', 'Real'],
|
86 |
+
"model_5": ['Realism', 'Deepfake'],
|
87 |
+
"model_6": ['ai_gen', 'human'],
|
88 |
+
"model_7": ['Fake', 'Real'],
|
89 |
+
"model_8": ['Fake', 'Real'],
|
90 |
+
}
|
91 |
+
|
92 |
+
def preprocess_resize_256(image):
|
93 |
+
if image.mode != 'RGB':
|
94 |
+
image = image.convert('RGB')
|
95 |
+
return transforms.Resize((256, 256))(image)
|
96 |
+
|
97 |
+
def preprocess_resize_224(image):
|
98 |
+
if image.mode != 'RGB':
|
99 |
+
image = image.convert('RGB')
|
100 |
+
return transforms.Resize((224, 224))(image)
|
101 |
+
|
102 |
+
def postprocess_pipeline(prediction, class_names):
|
103 |
+
# Assumes HuggingFace pipeline output
|
104 |
+
return {pred['label']: float(pred['score']) for pred in prediction}
|
105 |
+
|
106 |
+
def postprocess_logits(outputs, class_names):
|
107 |
+
# Assumes model output with logits
|
108 |
+
logits = outputs.logits.cpu().numpy()[0]
|
109 |
+
probabilities = softmax(logits)
|
110 |
+
return {class_names[i]: probabilities[i] for i in range(len(class_names))}
|
111 |
+
|
112 |
+
def postprocess_binary_output(output, class_names):
|
113 |
+
# output can be a dictionary {"probabilities": numpy_array} or directly a numpy_array
|
114 |
+
probabilities_array = None
|
115 |
+
if isinstance(output, dict) and "probabilities" in output:
|
116 |
+
probabilities_array = output["probabilities"]
|
117 |
+
elif isinstance(output, np.ndarray):
|
118 |
+
probabilities_array = output
|
119 |
+
else:
|
120 |
+
logger.warning(f"Unexpected output type for binary post-processing: {type(output)}. Expected dict with 'probabilities' or numpy.ndarray.")
|
121 |
+
return {class_names[0]: 0.0, class_names[1]: 1.0}
|
122 |
+
|
123 |
+
logger.info(f"Debug: Probabilities array entering postprocess_binary_output: {probabilities_array}, type: {type(probabilities_array)}, shape: {probabilities_array.shape}")
|
124 |
+
|
125 |
+
if probabilities_array is None:
|
126 |
+
logger.warning("Probabilities array is None after extracting from output. Returning default scores.")
|
127 |
+
return {class_names[0]: 0.0, class_names[1]: 1.0}
|
128 |
+
|
129 |
+
if probabilities_array.size == 1:
|
130 |
+
fake_prob = float(probabilities_array.item())
|
131 |
+
elif probabilities_array.size == 2:
|
132 |
+
fake_prob = float(probabilities_array[0])
|
133 |
+
else:
|
134 |
+
logger.warning(f"Unexpected probabilities array shape for binary post-processing: {probabilities_array.shape}. Expected size 1 or 2.")
|
135 |
+
return {class_names[0]: 0.0, class_names[1]: 1.0}
|
136 |
+
|
137 |
+
real_prob = 1.0 - fake_prob # Ensure Fake and Real sum to 1
|
138 |
+
return {class_names[0]: fake_prob, class_names[1]: real_prob}
|
139 |
+
|
140 |
+
def infer_gradio_api(image_path):
|
141 |
+
client = Client("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview")
|
142 |
+
result_dict = client.predict(
|
143 |
+
input_image=handle_file(image_path),
|
144 |
+
api_name="/simple_predict"
|
145 |
+
)
|
146 |
+
logger.info(f"Debug: Raw result_dict from Gradio API (model_8): {result_dict}, type: {type(result_dict)}")
|
147 |
+
# result_dict is already a dictionary, no need for ast.literal_eval
|
148 |
+
fake_probability = result_dict.get('Fake Probability', 0.0)
|
149 |
+
logger.info(f"Debug: Parsed result_dict: {result_dict}, Extracted fake_probability: {fake_probability}")
|
150 |
+
return {"probabilities": np.array([fake_probability])} # Return as a numpy array with one element
|
151 |
+
|
152 |
+
# New preprocess function for Gradio API
|
153 |
+
def preprocess_gradio_api(image: Image.Image):
|
154 |
+
# The Gradio API expects a file path, so we need to save the PIL Image to a temporary file.
|
155 |
+
temp_file_path = "./temp_gradio_input.png"
|
156 |
+
image.save(temp_file_path)
|
157 |
+
return temp_file_path
|
158 |
+
|
159 |
+
# New postprocess function for Gradio API (adapting postprocess_binary_output)
|
160 |
+
def postprocess_gradio_api(gradio_output, class_names):
|
161 |
+
# gradio_output is expected to be a dictionary like {"probabilities": np.array([fake_prob])}
|
162 |
+
probabilities_array = None
|
163 |
+
if isinstance(gradio_output, dict) and "probabilities" in gradio_output:
|
164 |
+
probabilities_array = gradio_output["probabilities"]
|
165 |
+
elif isinstance(gradio_output, np.ndarray):
|
166 |
+
probabilities_array = gradio_output
|
167 |
+
else:
|
168 |
+
logger.warning(f"Unexpected output type for Gradio API post-processing: {type(gradio_output)}. Expected dict with 'probabilities' or numpy.ndarray.")
|
169 |
+
return {class_names[0]: 0.0, class_names[1]: 1.0}
|
170 |
+
|
171 |
+
logger.info(f"Debug: Probabilities array entering postprocess_gradio_api: {probabilities_array}, type: {type(probabilities_array)}, shape: {probabilities_array.shape}")
|
172 |
+
|
173 |
+
if probabilities_array is None or probabilities_array.size == 0:
|
174 |
+
logger.warning("Probabilities array is None or empty after extracting from Gradio API output. Returning default scores.")
|
175 |
+
return {class_names[0]: 0.0, class_names[1]: 1.0}
|
176 |
+
|
177 |
+
# It should always be a single element array for fake probability
|
178 |
+
fake_prob = float(probabilities_array.item())
|
179 |
+
real_prob = 1.0 - fake_prob
|
180 |
+
|
181 |
+
return {class_names[0]: fake_prob, class_names[1]: real_prob}
|
182 |
+
|
183 |
+
def register_model_with_metadata(model_id, model, preprocess, postprocess, class_names, display_name, contributor, model_path, architecture=None, dataset=None):
|
184 |
+
entry = ModelEntry(model, preprocess, postprocess, class_names, display_name=display_name, contributor=contributor, model_path=model_path, architecture=architecture, dataset=dataset)
|
185 |
+
MODEL_REGISTRY[model_id] = entry
|
186 |
+
|
187 |
+
|
188 |
+
def load_onnx_model_and_preprocessor(hf_model_id):
|
189 |
+
# model_dir = snapshot_download(repo_id=hf_model_id, local_dir_use_symlinks=False)
|
190 |
+
|
191 |
+
# Create a unique local directory for each ONNX model
|
192 |
+
model_specific_dir = os.path.join("./models", hf_model_id.replace('/', '_'))
|
193 |
+
os.makedirs(model_specific_dir, exist_ok=True)
|
194 |
+
|
195 |
+
# Use hf_hub_download to get specific files into the model-specific directory
|
196 |
+
onnx_model_path = hf_hub_download(repo_id=hf_model_id, filename="model_quantized.onnx", subfolder="onnx", local_dir=model_specific_dir, local_dir_use_symlinks=False)
|
197 |
+
|
198 |
+
# Load preprocessor config
|
199 |
+
preprocessor_config = {}
|
200 |
+
try:
|
201 |
+
preprocessor_config_path = hf_hub_download(repo_id=hf_model_id, filename="preprocessor_config.json", local_dir=model_specific_dir, local_dir_use_symlinks=False)
|
202 |
+
with open(preprocessor_config_path, 'r') as f:
|
203 |
+
preprocessor_config = json.load(f)
|
204 |
+
except Exception as e:
|
205 |
+
logger.warning(f"Could not download or load preprocessor_config.json for {hf_model_id}: {e}")
|
206 |
+
|
207 |
+
# Load model config for class names if available
|
208 |
+
model_config = {}
|
209 |
+
try:
|
210 |
+
model_config_path = hf_hub_download(repo_id=hf_model_id, filename="config.json", local_dir=model_specific_dir, local_dir_use_symlinks=False)
|
211 |
+
with open(model_config_path, 'r') as f:
|
212 |
+
model_config = json.load(f)
|
213 |
+
except Exception as e:
|
214 |
+
logger.warning(f"Could not download or load config.json for {hf_model_id}: {e}")
|
215 |
+
|
216 |
+
return onnxruntime.InferenceSession(onnx_model_path), preprocessor_config, model_config
|
217 |
+
|
218 |
+
|
219 |
+
# Cache for ONNX sessions and preprocessors
|
220 |
+
_onnx_model_cache = {}
|
221 |
+
|
222 |
+
def get_onnx_model_from_cache(hf_model_id):
|
223 |
+
if hf_model_id not in _onnx_model_cache:
|
224 |
+
logger.info(f"Loading ONNX model and preprocessor for {hf_model_id}...")
|
225 |
+
_onnx_model_cache[hf_model_id] = load_onnx_model_and_preprocessor(hf_model_id)
|
226 |
+
return _onnx_model_cache[hf_model_id]
|
227 |
+
|
228 |
+
def preprocess_onnx_input(image: Image.Image, preprocessor_config: dict):
|
229 |
+
# Preprocess image for ONNX model based on preprocessor_config
|
230 |
+
if image.mode != 'RGB':
|
231 |
+
image = image.convert('RGB')
|
232 |
+
|
233 |
+
# Get image size and normalization values from preprocessor_config or use defaults
|
234 |
+
# Use 'size' for initial resize and 'crop_size' for center cropping
|
235 |
+
initial_resize_size = preprocessor_config.get('size', {'height': 224, 'width': 224})
|
236 |
+
crop_size = preprocessor_config.get('crop_size', initial_resize_size['height'])
|
237 |
+
mean = preprocessor_config.get('image_mean', [0.485, 0.456, 0.406])
|
238 |
+
std = preprocessor_config.get('image_std', [0.229, 0.224, 0.225])
|
239 |
+
|
240 |
+
transform = transforms.Compose([
|
241 |
+
transforms.Resize((initial_resize_size['height'], initial_resize_size['width'])),
|
242 |
+
transforms.CenterCrop(crop_size), # Apply center crop
|
243 |
+
transforms.ToTensor(),
|
244 |
+
transforms.Normalize(mean=mean, std=std),
|
245 |
+
])
|
246 |
+
input_tensor = transform(image)
|
247 |
+
# ONNX expects numpy array with batch dimension (1, C, H, W)
|
248 |
+
return input_tensor.unsqueeze(0).cpu().numpy()
|
249 |
+
|
250 |
+
def infer_onnx_model(hf_model_id, preprocessed_image_np, model_config: dict):
|
251 |
+
try:
|
252 |
+
ort_session, _, _ = get_onnx_model_from_cache(hf_model_id)
|
253 |
+
|
254 |
+
# Debug: Print expected input shape from ONNX model
|
255 |
+
for input_meta in ort_session.get_inputs():
|
256 |
+
logger.info(f"Debug: ONNX model expected input name: {input_meta.name}, shape: {input_meta.shape}, type: {input_meta.type}")
|
257 |
+
|
258 |
+
logger.info(f"Debug: preprocessed_image_np shape: {preprocessed_image_np.shape}")
|
259 |
+
ort_inputs = {ort_session.get_inputs()[0].name: preprocessed_image_np}
|
260 |
+
ort_outputs = ort_session.run(None, ort_inputs)
|
261 |
+
|
262 |
+
logits = ort_outputs[0]
|
263 |
+
logger.info(f"Debug: logits type: {type(logits)}, shape: {logits.shape}")
|
264 |
+
# If the model outputs a single logit (e.g., shape (1,)), use .item() to convert to scalar
|
265 |
+
# Otherwise, assume it's a batch of logits (e.g., shape (1, num_classes)) and take the first element (batch dim)
|
266 |
+
# The num_classes in config.json can be misleading; rely on actual output shape.
|
267 |
+
|
268 |
+
# Apply softmax to the logits to get probabilities for the classes
|
269 |
+
# The softmax function in utils/utils.py now ensures a list of floats
|
270 |
+
probabilities = softmax(logits[0]) # Assuming logits[0] is the relevant output for a single prediction
|
271 |
+
|
272 |
+
return {"logits": logits, "probabilities": probabilities}
|
273 |
+
|
274 |
+
except Exception as e:
|
275 |
+
logger.error(f"Error during ONNX inference for {hf_model_id}: {e}")
|
276 |
+
# Return a structure consistent with other model errors
|
277 |
+
return {"logits": np.array([]), "probabilities": np.array([])}
|
278 |
+
|
279 |
+
def postprocess_onnx_output(onnx_output, model_config):
|
280 |
+
# Get class names from model_config
|
281 |
+
# Prioritize id2label, then check num_classes, otherwise default
|
282 |
+
class_names_map = model_config.get('id2label')
|
283 |
+
if class_names_map:
|
284 |
+
class_names = [class_names_map[k] for k in sorted(class_names_map.keys())]
|
285 |
+
elif model_config.get('num_classes') == 1: # Handle models that output a single value (e.g., probability of 'Fake')
|
286 |
+
class_names = ['Fake', 'Real'] # Assume first class is 'Fake' and second 'Real'
|
287 |
+
else:
|
288 |
+
class_names = {0: 'Fake', 1: 'Real'} # Default to Fake/Real if not found or not 1 class
|
289 |
+
class_names = [class_names[i] for i in sorted(class_names.keys())]
|
290 |
+
|
291 |
+
probabilities = onnx_output.get("probabilities")
|
292 |
+
|
293 |
+
if probabilities is not None:
|
294 |
+
if model_config.get('num_classes') == 1 and len(probabilities) == 2: # Special handling for single output models
|
295 |
+
# The single output is the probability of the 'Fake' class
|
296 |
+
fake_prob = float(probabilities[0])
|
297 |
+
real_prob = float(probabilities[1])
|
298 |
+
return {class_names[0]: fake_prob, class_names[1]: real_prob}
|
299 |
+
elif len(probabilities) == len(class_names):
|
300 |
+
return {class_names[i]: float(probabilities[i]) for i in range(len(class_names))}
|
301 |
+
else:
|
302 |
+
logger.warning("ONNX post-processing: Probabilities length mismatch with class names.")
|
303 |
+
return {name: 0.0 for name in class_names}
|
304 |
+
else:
|
305 |
+
logger.warning("ONNX post-processing failed: 'probabilities' key not found in output.")
|
306 |
+
return {name: 0.0 for name in class_names}
|
307 |
+
|
308 |
+
# Register the ONNX quantized model
|
309 |
+
# Dummy entry for ONNX model to be loaded dynamically
|
310 |
+
# We will now register a 'wrapper' that handles dynamic loading
|
311 |
+
|
312 |
+
class ONNXModelWrapper:
|
313 |
+
def __init__(self, hf_model_id):
|
314 |
+
self.hf_model_id = hf_model_id
|
315 |
+
self._session = None
|
316 |
+
self._preprocessor_config = None
|
317 |
+
self._model_config = None
|
318 |
+
|
319 |
+
def load(self):
|
320 |
+
if self._session is None:
|
321 |
+
self._session, self._preprocessor_config, self._model_config = get_onnx_model_from_cache(self.hf_model_id)
|
322 |
+
logger.info(f"ONNX model {self.hf_model_id} loaded into wrapper.")
|
323 |
+
|
324 |
+
def __call__(self, image_np):
|
325 |
+
self.load() # Ensure model is loaded on first call
|
326 |
+
# Pass model_config to infer_onnx_model
|
327 |
+
return infer_onnx_model(self.hf_model_id, image_np, self._model_config)
|
328 |
+
|
329 |
+
def preprocess(self, image: Image.Image):
|
330 |
+
self.load()
|
331 |
+
return preprocess_onnx_input(image, self._preprocessor_config)
|
332 |
+
|
333 |
+
def postprocess(self, onnx_output: dict, class_names_from_registry: list): # class_names_from_registry is ignored
|
334 |
+
self.load()
|
335 |
+
return postprocess_onnx_output(onnx_output, self._model_config)
|
336 |
+
|
337 |
+
# Consolidate all model loading and registration
|
338 |
+
for model_key, hf_model_path in MODEL_PATHS.items():
|
339 |
+
logger.debug(f"Attempting to register model: {model_key} with path: {hf_model_path}")
|
340 |
+
model_num = model_key.replace("model_", "").upper()
|
341 |
+
contributor = "Unknown"
|
342 |
+
architecture = "Unknown"
|
343 |
+
dataset = "TBA"
|
344 |
+
|
345 |
+
current_class_names = CLASS_NAMES.get(model_key, [])
|
346 |
+
|
347 |
+
# Logic for ONNX models (1, 2, 3, 5, 6, 7)
|
348 |
+
if "ONNX" in hf_model_path:
|
349 |
+
logger.debug(f"Model {model_key} identified as ONNX.")
|
350 |
+
logger.info(f"Registering ONNX model: {model_key} from {hf_model_path}")
|
351 |
+
onnx_wrapper_instance = ONNXModelWrapper(hf_model_path)
|
352 |
+
|
353 |
+
# Attempt to derive contributor, architecture, dataset based on model_key
|
354 |
+
if model_key == "model_1":
|
355 |
+
contributor = "haywoodsloan"
|
356 |
+
architecture = "SwinV2"
|
357 |
+
dataset = "DeepFakeDetection"
|
358 |
+
elif model_key == "model_2":
|
359 |
+
contributor = "Heem2"
|
360 |
+
architecture = "ViT"
|
361 |
+
dataset = "DeepFakeDetection"
|
362 |
+
elif model_key == "model_3":
|
363 |
+
contributor = "Organika"
|
364 |
+
architecture = "VIT"
|
365 |
+
dataset = "SDXL"
|
366 |
+
elif model_key == "model_5":
|
367 |
+
contributor = "prithivMLmods"
|
368 |
+
architecture = "VIT"
|
369 |
+
elif model_key == "model_6":
|
370 |
+
contributor = "ideepankarsharma2003"
|
371 |
+
architecture = "SWINv1"
|
372 |
+
dataset = "SDXL, Midjourney"
|
373 |
+
elif model_key == "model_7":
|
374 |
+
contributor = "date3k2"
|
375 |
+
architecture = "VIT"
|
376 |
+
|
377 |
+
display_name_parts = [model_num]
|
378 |
+
if architecture and architecture not in ["Unknown"]:
|
379 |
+
display_name_parts.append(architecture)
|
380 |
+
if dataset and dataset not in ["TBA"]:
|
381 |
+
display_name_parts.append(dataset)
|
382 |
+
display_name = "-".join(display_name_parts)
|
383 |
+
display_name += "_ONNX" # Always append _ONNX for ONNX models
|
384 |
+
|
385 |
+
register_model_with_metadata(
|
386 |
+
model_id=model_key,
|
387 |
+
model=onnx_wrapper_instance, # The callable wrapper for the ONNX model
|
388 |
+
preprocess=onnx_wrapper_instance.preprocess,
|
389 |
+
postprocess=onnx_wrapper_instance.postprocess,
|
390 |
+
class_names=current_class_names, # Initial class names; will be overridden by model_config if available
|
391 |
+
display_name=display_name,
|
392 |
+
contributor=contributor,
|
393 |
+
model_path=hf_model_path,
|
394 |
+
architecture=architecture,
|
395 |
+
dataset=dataset
|
396 |
+
)
|
397 |
+
# Logic for Gradio API model (model_8)
|
398 |
+
elif model_key == "model_8":
|
399 |
+
logger.debug(f"Model {model_key} identified as Gradio API.")
|
400 |
+
logger.info(f"Registering Gradio API model: {model_key} from {hf_model_path}")
|
401 |
+
contributor = "aiwithoutborders-xyz"
|
402 |
+
architecture = "ViT"
|
403 |
+
dataset = "DeepfakeDetection"
|
404 |
+
|
405 |
+
display_name_parts = [model_num]
|
406 |
+
if architecture and architecture not in ["Unknown"]:
|
407 |
+
display_name_parts.append(architecture)
|
408 |
+
if dataset and dataset not in ["TBA"]:
|
409 |
+
display_name_parts.append(dataset)
|
410 |
+
display_name = "-".join(display_name_parts)
|
411 |
+
|
412 |
+
register_model_with_metadata(
|
413 |
+
model_id=model_key,
|
414 |
+
model=infer_gradio_api,
|
415 |
+
preprocess=preprocess_gradio_api,
|
416 |
+
postprocess=postprocess_gradio_api,
|
417 |
+
class_names=current_class_names,
|
418 |
+
display_name=display_name,
|
419 |
+
contributor=contributor,
|
420 |
+
model_path=hf_model_path,
|
421 |
+
architecture=architecture,
|
422 |
+
dataset=dataset
|
423 |
+
)
|
424 |
+
# Logic for PyTorch/Hugging Face pipeline models (currently only model_4)
|
425 |
+
elif model_key == "model_4": # Explicitly handle model_4
|
426 |
+
logger.debug(f"Model {model_key} identified as PyTorch/HuggingFace pipeline.")
|
427 |
+
logger.info(f"Registering HuggingFace pipeline/AutoModel: {model_key} from {hf_model_path}")
|
428 |
+
contributor = "cmckinle"
|
429 |
+
architecture = "VIT"
|
430 |
+
dataset = "SDXL, FLUX"
|
431 |
+
|
432 |
+
display_name_parts = [model_num]
|
433 |
+
if architecture and architecture not in ["Unknown"]:
|
434 |
+
display_name_parts.append(architecture)
|
435 |
+
if dataset and dataset not in ["TBA"]:
|
436 |
+
display_name_parts.append(dataset)
|
437 |
+
display_name = "-".join(display_name_parts)
|
438 |
+
|
439 |
+
current_processor = AutoFeatureExtractor.from_pretrained(hf_model_path, device=device)
|
440 |
+
model_instance = AutoModelForImageClassification.from_pretrained(hf_model_path).to(device)
|
441 |
+
|
442 |
+
preprocess_func = preprocess_resize_256
|
443 |
+
postprocess_func = postprocess_logits
|
444 |
+
|
445 |
+
def custom_infer(image, processor_local=current_processor, model_local=model_instance):
|
446 |
+
inputs = processor_local(image, return_tensors="pt").to(device)
|
447 |
+
with torch.no_grad():
|
448 |
+
outputs = model_local(**inputs)
|
449 |
+
return outputs
|
450 |
+
model_instance = custom_infer
|
451 |
+
|
452 |
+
register_model_with_metadata(
|
453 |
+
model_id=model_key,
|
454 |
+
model=model_instance,
|
455 |
+
preprocess=preprocess_func,
|
456 |
+
postprocess=postprocess_func,
|
457 |
+
class_names=current_class_names,
|
458 |
+
display_name=display_name,
|
459 |
+
contributor=contributor,
|
460 |
+
model_path=hf_model_path,
|
461 |
+
architecture=architecture,
|
462 |
+
dataset=dataset
|
463 |
+
)
|
464 |
+
else: # Fallback for any unhandled models (shouldn't happen if MODEL_PATHS is fully covered)
|
465 |
+
logger.warning(f"Could not automatically load and register model: {model_key} from {hf_model_path}. No matching registration logic found.")
|
466 |
+
|
467 |
+
|
468 |
+
def infer(image: Image.Image, model_id: str, confidence_threshold: float = 0.75) -> dict:
|
469 |
+
"""Predict using a specific model.
|
470 |
+
|
471 |
+
Args:
|
472 |
+
image (Image.Image): The input image to classify.
|
473 |
+
model_id (str): The ID of the model to use for classification.
|
474 |
+
confidence_threshold (float, optional): The confidence threshold for classification. Defaults to 0.75.
|
475 |
+
|
476 |
+
Returns:
|
477 |
+
dict: A dictionary containing the model details, classification scores, and label.
|
478 |
+
"""
|
479 |
+
entry = MODEL_REGISTRY[model_id]
|
480 |
+
img = entry.preprocess(image) if entry.preprocess else image
|
481 |
+
try:
|
482 |
+
result = entry.model(img)
|
483 |
+
scores = entry.postprocess(result, entry.class_names)
|
484 |
+
|
485 |
+
def _to_float_scalar(value):
|
486 |
+
if isinstance(value, np.ndarray):
|
487 |
+
return float(value.item()) # Convert numpy array scalar to Python float
|
488 |
+
return float(value) # Already a Python scalar or convertible type
|
489 |
+
|
490 |
+
ai_score = _to_float_scalar(scores.get(entry.class_names[0], 0.0))
|
491 |
+
real_score = _to_float_scalar(scores.get(entry.class_names[1], 0.0))
|
492 |
+
label = "AI" if ai_score >= confidence_threshold else ("REAL" if real_score >= confidence_threshold else "UNCERTAIN")
|
493 |
+
return {
|
494 |
+
"Model": entry.display_name,
|
495 |
+
"Contributor": entry.contributor,
|
496 |
+
"HF Model Path": entry.model_path,
|
497 |
+
"AI Score": ai_score,
|
498 |
+
"Real Score": real_score,
|
499 |
+
"Label": label
|
500 |
+
}
|
501 |
+
except Exception as e:
|
502 |
+
return {
|
503 |
+
"Model": entry.display_name,
|
504 |
+
"Contributor": entry.contributor,
|
505 |
+
"HF Model Path": entry.model_path,
|
506 |
+
"AI Score": 0.0,
|
507 |
+
"Real Score": 0.0,
|
508 |
+
"Label": f"Error: {str(e)}"
|
509 |
+
}
|
510 |
+
|
511 |
+
def full_prediction(img, confidence_threshold, rotate_degrees, noise_level, sharpen_strength):
|
512 |
+
"""Full prediction run, with a team of ensembles and agents.
|
513 |
+
|
514 |
+
Args:
|
515 |
+
img (url: str, Image.Image, np.ndarray): The input image to classify.
|
516 |
+
confidence_threshold (float, optional): The confidence threshold for classification. Defaults to 0.75.
|
517 |
+
rotate_degrees (int, optional): The degrees to rotate the image.
|
518 |
+
noise_level (int, optional): The noise level to use.
|
519 |
+
sharpen_strength (int, optional): The sharpen strength to use.
|
520 |
+
|
521 |
+
Returns:
|
522 |
+
dict: A dictionary containing the model details, classification scores, and label.
|
523 |
+
"""
|
524 |
+
# Ensure img is a PIL Image object
|
525 |
+
if img is None:
|
526 |
+
raise gr.Error("No image provided. Please upload an image to analyze.")
|
527 |
+
# Handle filepath conversion if needed
|
528 |
+
if isinstance(img, str):
|
529 |
+
try:
|
530 |
+
img = load_image(img)
|
531 |
+
except Exception as e:
|
532 |
+
logger.error(f"Error loading image from path: {e}")
|
533 |
+
raise gr.Error(f"Could not load image from the provided path. Error: {str(e)}")
|
534 |
+
|
535 |
+
if not isinstance(img, Image.Image):
|
536 |
+
try:
|
537 |
+
img = Image.fromarray(img)
|
538 |
+
except Exception as e:
|
539 |
+
logger.error(f"Error converting input image to PIL: {e}")
|
540 |
+
raise gr.Error("Input image could not be converted to a valid image format. Please try another image.")
|
541 |
+
|
542 |
+
# Ensure image is in RGB format for consistent processing
|
543 |
+
if img.mode != 'RGB':
|
544 |
+
img = img.convert('RGB')
|
545 |
+
|
546 |
+
monitor_agent = EnsembleMonitorAgent()
|
547 |
+
weight_manager = ModelWeightManager(strongest_model_id="simple_prediction")
|
548 |
+
optimization_agent = WeightOptimizationAgent(weight_manager)
|
549 |
+
health_agent = SystemHealthAgent()
|
550 |
+
context_agent = ContextualIntelligenceAgent()
|
551 |
+
anomaly_agent = ForensicAnomalyDetectionAgent()
|
552 |
+
health_agent.monitor_system_health()
|
553 |
+
if rotate_degrees or noise_level or sharpen_strength:
|
554 |
+
img_pil, _ = augment_image(img, ["rotate", "add_noise", "sharpen"], rotate_degrees, noise_level, sharpen_strength)
|
555 |
+
else:
|
556 |
+
img_pil = img
|
557 |
+
img_np_og = np.array(img)
|
558 |
+
|
559 |
+
model_predictions_raw = {}
|
560 |
+
confidence_scores = {}
|
561 |
+
results = []
|
562 |
+
table_rows = []
|
563 |
+
|
564 |
+
# Initialize lists for forensic outputs, starting with the original augmented image
|
565 |
+
cleaned_forensics_images = []
|
566 |
+
forensic_output_descriptions = []
|
567 |
+
|
568 |
+
# Always add the original augmented image first for forensic display
|
569 |
+
if isinstance(img_pil, Image.Image):
|
570 |
+
cleaned_forensics_images.append(img_pil)
|
571 |
+
forensic_output_descriptions.append(f"Original augmented image (PIL): {img_pil.width}x{img_pil.height}")
|
572 |
+
elif isinstance(img_pil, np.ndarray):
|
573 |
+
try:
|
574 |
+
pil_img_from_np = Image.fromarray(img_pil)
|
575 |
+
cleaned_forensics_images.append(pil_img_from_np)
|
576 |
+
forensic_output_descriptions.append(f"Original augmented image (numpy converted to PIL): {pil_img_from_np.width}x{pil_img_from_np.height}")
|
577 |
+
except Exception as e:
|
578 |
+
logger.warning(f"Could not convert original numpy image to PIL for gallery: {e}")
|
579 |
+
|
580 |
+
# Yield initial state with augmented image and empty model predictions
|
581 |
+
yield img_pil, cleaned_forensics_images, table_rows, "[]", "<div style='font-size: 2.2em; font-weight: bold;padding: 10px;'>Consensus: <span style='color:orange'>UNCERTAIN</span></div>"
|
582 |
+
|
583 |
+
|
584 |
+
# Stream results as each model finishes
|
585 |
+
for model_id in MODEL_REGISTRY:
|
586 |
+
model_start = time.time()
|
587 |
+
result = infer(img_pil, model_id, confidence_threshold)
|
588 |
+
model_end = time.time()
|
589 |
+
|
590 |
+
# Helper to ensure values are Python floats, handling numpy scalars
|
591 |
+
def _ensure_float_scalar(value):
|
592 |
+
if isinstance(value, np.ndarray):
|
593 |
+
return float(value.item()) # Convert numpy array scalar to Python float
|
594 |
+
return float(value) # Already a Python scalar or convertible type
|
595 |
+
|
596 |
+
ai_score_val = _ensure_float_scalar(result.get("AI Score", 0.0))
|
597 |
+
real_score_val = _ensure_float_val = _ensure_float_scalar(result.get("Real Score", 0.0))
|
598 |
+
|
599 |
+
monitor_agent.monitor_prediction(
|
600 |
+
model_id,
|
601 |
+
result["Label"],
|
602 |
+
max(ai_score_val, real_score_val),
|
603 |
+
model_end - model_start
|
604 |
+
)
|
605 |
+
model_predictions_raw[model_id] = result
|
606 |
+
confidence_scores[model_id] = max(ai_score_val, real_score_val)
|
607 |
+
results.append(result)
|
608 |
+
table_rows.append([
|
609 |
+
result.get("Model", ""),
|
610 |
+
result.get("Contributor", ""),
|
611 |
+
round(ai_score_val, 5),
|
612 |
+
round(real_score_val, 5),
|
613 |
+
result.get("Label", "Error")
|
614 |
+
])
|
615 |
+
# Yield partial results: only update the table, others are None
|
616 |
+
yield None, cleaned_forensics_images, table_rows, None, None # Keep cleaned_forensics_images as is (only augmented image for now)
|
617 |
+
|
618 |
+
# Multi-threaded forensic processing
|
619 |
+
def _run_forensic_task(task_func, img_input, description, **kwargs):
|
620 |
+
try:
|
621 |
+
result_img = task_func(img_input, **kwargs)
|
622 |
+
return result_img, description
|
623 |
+
except Exception as e:
|
624 |
+
logger.error(f"Error processing forensic task {task_func.__name__}: {e}")
|
625 |
+
return None, f"Error processing {description}: {str(e)}"
|
626 |
+
|
627 |
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
628 |
+
future_ela1 = executor.submit(_run_forensic_task, ELA, img_np_og, "ELA analysis (Pass 1): Grayscale error map, quality 75.", quality=75, scale=50, contrast=20, linear=False, grayscale=True)
|
629 |
+
future_ela2 = executor.submit(_run_forensic_task, ELA, img_np_og, "ELA analysis (Pass 2): Grayscale error map, quality 75, enhanced contrast.", quality=75, scale=75, contrast=25, linear=False, grayscale=True)
|
630 |
+
future_ela3 = executor.submit(_run_forensic_task, ELA, img_np_og, "ELA analysis (Pass 3): Color error map, quality 75, enhanced contrast.", quality=75, scale=75, contrast=25, linear=False, grayscale=False)
|
631 |
+
future_gradient1 = executor.submit(_run_forensic_task, gradient_processing, img_np_og, "Gradient processing: Highlights edges and transitions.")
|
632 |
+
future_gradient2 = executor.submit(_run_forensic_task, gradient_processing, img_np_og, "Gradient processing: Int=45, Equalize=True", intensity=45, equalize=True)
|
633 |
+
future_minmax1 = executor.submit(_run_forensic_task, minmax_process, img_np_og, "MinMax processing: Deviations in local pixel values.")
|
634 |
+
future_minmax2 = executor.submit(_run_forensic_task, minmax_process, img_np_og, "MinMax processing (Radius=6): Deviations in local pixel values.", radius=6)
|
635 |
+
|
636 |
+
forensic_futures = [future_ela1, future_ela2, future_ela3, future_gradient1, future_gradient2, future_minmax1, future_minmax2]
|
637 |
+
|
638 |
+
for future in concurrent.futures.as_completed(forensic_futures):
|
639 |
+
processed_img, description = future.result()
|
640 |
+
if processed_img is not None:
|
641 |
+
if isinstance(processed_img, Image.Image):
|
642 |
+
cleaned_forensics_images.append(processed_img)
|
643 |
+
elif isinstance(processed_img, np.ndarray):
|
644 |
+
try:
|
645 |
+
cleaned_forensics_images.append(Image.fromarray(processed_img))
|
646 |
+
except Exception as e:
|
647 |
+
logger.warning(f"Could not convert numpy array to PIL Image for gallery: {e}")
|
648 |
+
else:
|
649 |
+
logger.warning(f"Unexpected type in processed_img from {description}: {type(processed_img)}. Skipping.")
|
650 |
+
|
651 |
+
forensic_output_descriptions.append(description) # Keep track of descriptions for anomaly agent
|
652 |
+
|
653 |
+
# Yield partial results: update gallery
|
654 |
+
yield None, cleaned_forensics_images, table_rows, None, None
|
655 |
+
|
656 |
+
# After all models, compute the rest as before
|
657 |
+
image_data_for_context = {
|
658 |
+
"width": img.width,
|
659 |
+
"height": img.height,
|
660 |
+
"mode": img.mode,
|
661 |
+
}
|
662 |
+
forensic_output_descriptions = [
|
663 |
+
f"Original augmented image (PIL): {img_pil.width}x{img_pil.height}",
|
664 |
+
"ELA analysis (Pass 1): Grayscale error map, quality 75.",
|
665 |
+
"ELA analysis (Pass 2): Grayscale error map, quality 75, enhanced contrast.",
|
666 |
+
"ELA analysis (Pass 3): Color error map, quality 75, enhanced contrast.",
|
667 |
+
"Gradient processing: Highlights edges and transitions.",
|
668 |
+
"Gradient processing: Int=45, Equalize=True",
|
669 |
+
"MinMax processing: Deviations in local pixel values.",
|
670 |
+
"MinMax processing (Radius=6): Deviations in local pixel values.",
|
671 |
+
# "Bit Plane extractor: Visualization of individual bit planes from different color channels."
|
672 |
+
]
|
673 |
+
detected_context_tags = context_agent.infer_context_tags(image_data_for_context, model_predictions_raw)
|
674 |
+
logger.info(f"Detected context tags: {detected_context_tags}")
|
675 |
+
adjusted_weights = weight_manager.adjust_weights(model_predictions_raw, confidence_scores, context_tags=detected_context_tags)
|
676 |
+
weighted_predictions = {"AI": 0.0, "REAL": 0.0, "UNCERTAIN": 0.0}
|
677 |
+
for model_id, prediction in model_predictions_raw.items():
|
678 |
+
prediction_label = prediction.get("Label")
|
679 |
+
if prediction_label in weighted_predictions:
|
680 |
+
weighted_predictions[prediction_label] += adjusted_weights[model_id]
|
681 |
+
else:
|
682 |
+
logger.warning(f"Unexpected prediction label '{prediction_label}' from model '{model_id}'. Skipping its weight in consensus.")
|
683 |
+
final_prediction_label = "UNCERTAIN"
|
684 |
+
if weighted_predictions["AI"] > weighted_predictions["REAL"] and weighted_predictions["AI"] > weighted_predictions["UNCERTAIN"]:
|
685 |
+
final_prediction_label = "AI"
|
686 |
+
elif weighted_predictions["REAL"] > weighted_predictions["AI"] and weighted_predictions["REAL"] > weighted_predictions["UNCERTAIN"]:
|
687 |
+
final_prediction_label = "REAL"
|
688 |
+
optimization_agent.analyze_performance(final_prediction_label, None)
|
689 |
+
# gradient_image = gradient_processing(img_np_og)
|
690 |
+
# gradient_image2 = gradient_processing(img_np_og, intensity=45, equalize=True)
|
691 |
+
# minmax_image = minmax_process(img_np_og)
|
692 |
+
# minmax_image2 = minmax_process(img_np_og, radius=6)
|
693 |
+
# # bitplane_image = bit_plane_extractor(img_pil)
|
694 |
+
# ela1 = ELA(img_np_og, quality=75, scale=50, contrast=20, linear=False, grayscale=True)
|
695 |
+
# ela2 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=True)
|
696 |
+
# ela3 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=False)
|
697 |
+
# forensics_images = [img_pil, ela1, ela2, ela3, gradient_image, gradient_image2, minmax_image, minmax_image2]
|
698 |
+
# forensic_output_descriptions = [
|
699 |
+
# f"Original augmented image (PIL): {img_pil.width}x{img_pil.height}",
|
700 |
+
# "ELA analysis (Pass 1): Grayscale error map, quality 75.",
|
701 |
+
# "ELA analysis (Pass 2): Grayscale error map, quality 75, enhanced contrast.",
|
702 |
+
# "ELA analysis (Pass 3): Color error map, quality 75, enhanced contrast.",
|
703 |
+
# "Gradient processing: Highlights edges and transitions.",
|
704 |
+
# "Gradient processing: Int=45, Equalize=True",
|
705 |
+
# "MinMax processing: Deviations in local pixel values.",
|
706 |
+
# "MinMax processing (Radius=6): Deviations in local pixel values.",
|
707 |
+
# # "Bit Plane extractor: Visualization of individual bit planes from different color channels."
|
708 |
+
# ]
|
709 |
+
anomaly_detection_results = anomaly_agent.analyze_forensic_outputs(forensic_output_descriptions)
|
710 |
+
logger.info(f"Forensic anomaly detection: {anomaly_detection_results['summary']}")
|
711 |
+
consensus_html = f"<div style='font-size: 2.2em; font-weight: bold;padding: 10px;'>Consensus: <span style='color:{'red' if final_prediction_label == 'AI' else ('green' if final_prediction_label == 'REAL' else 'orange')}'>{final_prediction_label}</span></div>"
|
712 |
+
inference_params = {
|
713 |
+
"confidence_threshold": confidence_threshold,
|
714 |
+
"rotate_degrees": rotate_degrees,
|
715 |
+
"noise_level": noise_level,
|
716 |
+
"sharpen_strength": sharpen_strength,
|
717 |
+
"detected_context_tags": detected_context_tags
|
718 |
+
}
|
719 |
+
ensemble_output_data = {
|
720 |
+
"final_prediction_label": final_prediction_label,
|
721 |
+
"weighted_predictions": weighted_predictions,
|
722 |
+
"adjusted_weights": adjusted_weights
|
723 |
+
}
|
724 |
+
agent_monitoring_data_log = {
|
725 |
+
"ensemble_monitor": {
|
726 |
+
"alerts": monitor_agent.alerts,
|
727 |
+
"performance_metrics": monitor_agent.performance_metrics
|
728 |
+
},
|
729 |
+
"weight_optimization": {
|
730 |
+
"prediction_history_length": len(optimization_agent.prediction_history),
|
731 |
+
},
|
732 |
+
"system_health": {
|
733 |
+
"memory_usage": health_agent.health_metrics["memory_usage"],
|
734 |
+
"gpu_utilization": health_agent.health_metrics["gpu_utilization"]
|
735 |
+
},
|
736 |
+
"context_intelligence": {
|
737 |
+
"detected_context_tags": detected_context_tags
|
738 |
+
},
|
739 |
+
"forensic_anomaly_detection": anomaly_detection_results
|
740 |
+
}
|
741 |
+
log_inference_data(
|
742 |
+
original_image=img,
|
743 |
+
inference_params=inference_params,
|
744 |
+
model_predictions=results,
|
745 |
+
ensemble_output=ensemble_output_data,
|
746 |
+
forensic_images=cleaned_forensics_images, # Use the incrementally built list
|
747 |
+
agent_monitoring_data=agent_monitoring_data_log,
|
748 |
+
human_feedback=None
|
749 |
+
)
|
750 |
+
|
751 |
+
logger.info(f"Cleaned forensic images types: {[type(img) for img in cleaned_forensics_images]}")
|
752 |
+
for i, res_dict in enumerate(results):
|
753 |
+
for key in ["AI Score", "Real Score"]:
|
754 |
+
value = res_dict.get(key)
|
755 |
+
if isinstance(value, np.float32):
|
756 |
+
res_dict[key] = float(value)
|
757 |
+
logger.info(f"Converted {key} for result {i} from numpy.float32 to float.")
|
758 |
+
json_results = json.dumps(results, cls=NumpyEncoder)
|
759 |
+
yield img_pil, cleaned_forensics_images, table_rows, json_results, consensus_html
|
760 |
+
|
761 |
+
detection_model_eval_playground = gr.Interface(
|
762 |
+
fn=full_prediction,
|
763 |
+
inputs=[
|
764 |
+
gr.Image(label="Upload Image to Analyze", sources=['upload', 'webcam'], type='filepath'),
|
765 |
+
gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Confidence Threshold"),
|
766 |
+
gr.Slider(0, 45, value=0, step=1, label="Rotate Degrees", visible=False),
|
767 |
+
gr.Slider(0, 50, value=0, step=1, label="Noise Level", visible=False),
|
768 |
+
gr.Slider(0, 50, value=0, step=1, label="Sharpen Strength", visible=False)
|
769 |
+
],
|
770 |
+
outputs=[
|
771 |
+
gr.Image(label="Processed Image", visible=False),
|
772 |
+
gr.Gallery(label="Post Processed Images", visible=True, columns=[4], rows=[2], container=False, height="auto", object_fit="contain", elem_id="post-gallery"),
|
773 |
+
gr.Dataframe(
|
774 |
+
label="Model Predictions",
|
775 |
+
headers=["Arch / Dataset", "By", "AI", "Real", "Label"],
|
776 |
+
datatype=["str", "str", "number", "number", "str"]
|
777 |
+
),
|
778 |
+
gr.JSON(label="Raw Model Results", visible=False),
|
779 |
+
gr.Markdown(label="Consensus", value="")
|
780 |
+
],
|
781 |
+
title="Multi-Model Ensemble + Agentic Coordinated Deepfake Detection (Paper in Progress)",
|
782 |
+
description="The detection of AI-generated images has entered a critical inflection point. While existing solutions struggle with outdated datasets and inflated claims, our approach prioritizes agility, community collaboration, and an offensive approach to deepfake detection.",
|
783 |
+
api_name="predict",
|
784 |
+
live=True # Enable streaming
|
785 |
+
)
|
786 |
+
# def echo_headers(x, request: gr.Request):
|
787 |
+
# print(dict(request.headers))
|
788 |
+
# return str(dict(request.headers))
|
789 |
+
|
790 |
+
|
791 |
+
def predict(img):
|
792 |
+
"""
|
793 |
+
Predicts whether an image is AI-generated or real using the SOTA Community Forensics model.
|
794 |
+
|
795 |
+
Args:
|
796 |
+
img (str): Path to the input image file to analyze.
|
797 |
+
|
798 |
+
Returns:
|
799 |
+
dict: A dictionary containing:
|
800 |
+
- 'Fake Probability' (float): Probability score between 0 and 1 indicating likelihood of being AI-generated
|
801 |
+
- 'Result Description' (str): Human-readable description of the prediction result
|
802 |
+
|
803 |
+
Example:
|
804 |
+
>>> result = predict("path/to/image.jpg")
|
805 |
+
>>> print(result)
|
806 |
+
{'Fake Probability': 0.002, 'Result Description': 'The image is likely real.'}
|
807 |
+
"""
|
808 |
+
client = Client("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview")
|
809 |
+
client.view_api()
|
810 |
+
result = client.predict(
|
811 |
+
handle_file(img),
|
812 |
+
api_name="/simple_predict"
|
813 |
+
)
|
814 |
+
return str(result)
|
815 |
+
community_forensics_preview = gr.Interface(
|
816 |
+
fn=predict,
|
817 |
+
inputs=gr.Image(type="filepath"),
|
818 |
+
outputs=gr.HTML(), # or gr.Markdown() if it's just text
|
819 |
+
title="Quick and simple prediction by our strongest model.",
|
820 |
+
description="No ensemble, no context, no agents, just a quick and simple prediction by our strongest model.",
|
821 |
+
api_name="predict"
|
822 |
+
)
|
823 |
+
|
824 |
+
# leaderboard = gr.Interface(
|
825 |
+
# fn=lambda: "# AI Generated / Deepfake Detection Models Leaderboard: Soonβ’",
|
826 |
+
# inputs=None,
|
827 |
+
# outputs=gr.Markdown(),
|
828 |
+
# title="Leaderboard",
|
829 |
+
# api_name="leaderboard"
|
830 |
+
# )
|
831 |
+
def simple_prediction(img):
|
832 |
+
"""
|
833 |
+
Quick and simple deepfake or real image prediction by the strongest open-source model on the hub.
|
834 |
+
|
835 |
+
Args:
|
836 |
+
img (str): The input image to analyze, provided as a file path.
|
837 |
+
|
838 |
+
Returns:
|
839 |
+
str: The prediction result stringified from dict. Example: `{'Fake Probability': 0.002, 'Result Description': 'The image is likely real.'}`
|
840 |
+
"""
|
841 |
+
client = Client("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview")
|
842 |
+
client.view_api()
|
843 |
+
client.predict(
|
844 |
+
handle_file(img),
|
845 |
+
api_name="simple_predict"
|
846 |
+
)
|
847 |
+
simple_predict_interface = gr.Interface(
|
848 |
+
fn=simple_prediction,
|
849 |
+
inputs=gr.Image(type="filepath"),
|
850 |
+
outputs=gr.Text(),
|
851 |
+
title="Quick and simple prediction by our strongest model.",
|
852 |
+
description="No ensemble, no context, no agents, just a quick and simple prediction by our strongest model.",
|
853 |
+
api_name="simple_predict"
|
854 |
+
)
|
855 |
+
|
856 |
+
noise_estimation_interface = gr.Interface(
|
857 |
+
fn=noise_estimation,
|
858 |
+
inputs=[gr.Image(type="pil"), gr.Slider(1, 32, value=8, step=1, label="Block Size")],
|
859 |
+
outputs=gr.Image(type="pil"),
|
860 |
+
title="Wavelet-Based Noise Analysis",
|
861 |
+
description="Analyzes image noise patterns using wavelet decomposition. This tool helps detect compression artifacts and artificial noise patterns that may indicate image manipulation. Higher noise levels in specific regions can reveal areas of potential tampering.",
|
862 |
+
api_name="tool_waveletnoise"
|
863 |
+
)
|
864 |
+
|
865 |
+
bit_plane_interface = gr.Interface(
|
866 |
+
fn=bit_plane_extractor,
|
867 |
+
inputs=[
|
868 |
+
gr.Image(type="pil"),
|
869 |
+
gr.Dropdown(["Luminance", "Red", "Green", "Blue", "RGB Norm"], label="Channel", value="Luminance"),
|
870 |
+
gr.Slider(0, 7, value=0, step=1, label="Bit Plane"),
|
871 |
+
gr.Dropdown(["Disabled", "Median", "Gaussian"], label="Filter", value="Disabled")
|
872 |
+
],
|
873 |
+
outputs=gr.Image(type="pil"),
|
874 |
+
title="Bit Plane Analysis",
|
875 |
+
description="Extracts and visualizes individual bit planes from different color channels. This forensic tool helps identify hidden patterns and artifacts in image data that may indicate manipulation. Different bit planes can reveal inconsistencies in image processing or editing.",
|
876 |
+
api_name="tool_bitplane"
|
877 |
+
)
|
878 |
+
|
879 |
+
ela_interface = gr.Interface(
|
880 |
+
fn=ELA,
|
881 |
+
inputs=[
|
882 |
+
gr.Image(type="pil", label="Input Image"),
|
883 |
+
gr.Slider(1, 100, value=75, step=1, label="JPEG Quality"),
|
884 |
+
gr.Slider(1, 100, value=50, step=1, label="Output Scale (Multiplicative Gain)"),
|
885 |
+
gr.Slider(0, 100, value=20, step=1, label="Output Contrast (Tonality Compression)"),
|
886 |
+
gr.Checkbox(value=False, label="Use Linear Difference"),
|
887 |
+
gr.Checkbox(value=False, label="Grayscale Output")
|
888 |
+
],
|
889 |
+
outputs=gr.Image(type="pil"),
|
890 |
+
title="Error Level Analysis (ELA)",
|
891 |
+
description="Performs Error Level Analysis to detect re-saved JPEG images, which can indicate tampering. ELA highlights areas of an image that have different compression levels.",
|
892 |
+
api_name="tool_ela"
|
893 |
+
)
|
894 |
+
|
895 |
+
gradient_processing_interface = gr.Interface(
|
896 |
+
fn=gradient_processing,
|
897 |
+
inputs=[
|
898 |
+
gr.Image(type="pil", label="Input Image"),
|
899 |
+
gr.Slider(0, 100, value=90, step=1, label="Intensity"),
|
900 |
+
gr.Dropdown(["Abs", "None", "Flat", "Norm"], label="Blue Mode", value="Abs"),
|
901 |
+
gr.Checkbox(value=False, label="Invert Gradients"),
|
902 |
+
gr.Checkbox(value=False, label="Equalize Histogram")
|
903 |
+
],
|
904 |
+
outputs=gr.Image(type="pil"),
|
905 |
+
title="Gradient Processing",
|
906 |
+
description="Applies gradient filters to an image to enhance edges and transitions, which can reveal inconsistencies due to manipulation.",
|
907 |
+
api_name="tool_gradient_processing"
|
908 |
+
)
|
909 |
+
|
910 |
+
minmax_processing_interface = gr.Interface(
|
911 |
+
fn=minmax_process,
|
912 |
+
inputs=[
|
913 |
+
gr.Image(type="pil", label="Input Image"),
|
914 |
+
gr.Radio([0, 1, 2, 3, 4], label="Channel (0:Grayscale, 1:Blue, 2:Green, 3:Red, 4:RGB Norm)", value=4),
|
915 |
+
gr.Slider(0, 10, value=2, step=1, label="Radius")
|
916 |
+
],
|
917 |
+
outputs=gr.Image(type="pil"),
|
918 |
+
title="MinMax Processing",
|
919 |
+
description="Analyzes local pixel value deviations to detect subtle changes in image data, often indicative of digital forgeries.",
|
920 |
+
api_name="tool_minmax_processing"
|
921 |
+
)
|
922 |
+
|
923 |
+
# augmentation_tool_interface = gr.Interface(
|
924 |
+
# fn=augment_image,
|
925 |
+
# inputs=[
|
926 |
+
# gr.Image(label="Upload Image to Augment", sources=['upload', 'webcam'], type='pil'),
|
927 |
+
# gr.CheckboxGroup(["rotate", "add_noise", "sharpen"], label="Augmentation Methods"),
|
928 |
+
# gr.Slider(0, 360, value=0, step=1, label="Rotate Degrees", visible=True),
|
929 |
+
# gr.Slider(0, 100, value=0, step=1, label="Noise Level", visible=True),
|
930 |
+
# gr.Slider(0, 200, value=1, step=1, label="Sharpen Strength", visible=True)
|
931 |
+
# ],
|
932 |
+
# outputs=gr.Image(label="Augmented Image", type='pil'),
|
933 |
+
# title="Image Augmentation Tool",
|
934 |
+
# description="Apply various augmentation techniques to your image.",
|
935 |
+
# api_name="augment_image"
|
936 |
+
# )
|
937 |
+
|
938 |
+
# def get_captured_logs():
|
939 |
+
# # Retrieve all logs from the queue and clear it
|
940 |
+
# logs = list(log_queue)
|
941 |
+
# log_queue.clear() # Clear the queue after retrieving
|
942 |
+
# return "\n".join(logs)
|
943 |
+
|
944 |
+
|
945 |
+
demo = gr.TabbedInterface(
|
946 |
+
[
|
947 |
+
detection_model_eval_playground,
|
948 |
+
community_forensics_preview,
|
949 |
+
noise_estimation_interface,
|
950 |
+
bit_plane_interface,
|
951 |
+
ela_interface,
|
952 |
+
gradient_processing_interface,
|
953 |
+
minmax_processing_interface,
|
954 |
+
# gr.Textbox(label="Agent Logs", interactive=False, lines=5, max_lines=20, autoscroll=True) # New textbox for logs
|
955 |
+
],
|
956 |
+
[
|
957 |
+
"Run Ensemble Prediction",
|
958 |
+
"Open-Source SOTA Model",
|
959 |
+
"Wavelet Blocking Noise Estimation",
|
960 |
+
"Bit Plane Values",
|
961 |
+
"Error Level Analysis (ELA)",
|
962 |
+
"Gradient Processing",
|
963 |
+
"MinMax Processing",
|
964 |
+
# "Agent Logs" # New tab title
|
965 |
+
],
|
966 |
+
title="Deepfake Detection & Forensics Tools",
|
967 |
+
theme=None,
|
968 |
+
|
969 |
+
)
|
970 |
+
footerMD = """
|
971 |
+
## β οΈ ENSEMBLE TEAM IN TRAINING β οΈ \n\n
|
972 |
+
|
973 |
+
1. **DISCLAIMER: METADATA AS WELL AS MEDIA SUBMITTED TO THIS SPACE MAY BE VIEWED AND SELECTED FOR FUTURE DATASETS, PLEASE DO NOT SUBMIT PERSONAL CONTENT. FOR UNTRACKED, PRIVATE USE OF THE MODELS YOU MAY STILL USE [THE ORIGINAL SPACE HERE](https://huggingface.co/spaces/aiwithoutborders-xyz/OpenSight-Deepfake-Detection-Models-Playground), SOTA MODEL INCLUDED.**
|
974 |
+
2. **UPDATE 6-13-25**: APOLOGIES FOR THE CONFUSION, WE ARE WORKING TO REVERT THE ORIGINAL REPO BACK TO ITS NON-DATA COLLECTION STATE -- ONLY THE "SIMPLE PREDICTION" ENDPOINT IS CURRENTLY 100% PRIVATE. PLEASE STAY TUNED AS WE FIGURE OUT A SOLUTION FOR THE ENSEMBLE + AGENT TEAM ENDPOINT. IT CAN GET RESOURCE INTENSIVE TO RUN A FULL PREDICTION. ALTERNATIVELY, WE **ENCOURAGE** ANYONE TO FORK AND CONTRIBUTE TO THE PROJECT.
|
975 |
+
3. **UPDATE 6-13-25 (cont.)**: WHILE WE HAVE NOT TAKEN A STANCE ON NSFW AND EXPLICIT CONTENT, PLEASE REFRAIN FROM ... YOUR HUMAN DESIRES UNTIL WE GET THIS PRIVACY SITUATION SORTED OUT. DO NOT BE RECKLESS PLEASE. OUR PAPER WILL BE OUT SOON ON ARXIV WHICH WILL EXPLAIN EVERYTHING WITH DATA-BACKED RESEARCH ON WHY THIS PROJECT IS NEEDED, BUT WE CANNOT DO IT WITHOUT THE HELP OF THE COMMUNITY.
|
976 |
+
|
977 |
+
TO SUMMARIZE: DATASET COLLECTION WILL CONTINUE FOR OUR NOVEL ENSEMBLE-TEAM PREDICTION PIPELINE UNTIL WE CAN GET THINGS SORTED OUT. FOR THOSE THAT WISH TO OPT-OUT, WE OFFER THE SIMPLE, BUT [MOST POWERFUL DETECTION MODEL HERE.](https://huggingface.co/spaces/aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview)
|
978 |
+
|
979 |
+
"""
|
980 |
+
footer = gr.Markdown(footerMD, elem_classes="footer")
|
981 |
+
|
982 |
+
with gr.Blocks() as app:
|
983 |
+
demo.render()
|
984 |
+
footer.render()
|
985 |
+
|
986 |
+
|
987 |
+
app.queue(max_size=10, default_concurrency_limit=2).launch(mcp_server=True)
|
forensics/__init__.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .bitplane import bit_plane_extractor
|
2 |
+
from .ela import ELA
|
3 |
+
# from .exif import exif_full_dump
|
4 |
+
from .gradient import gradient_processing
|
5 |
+
from .minmax import minmax_process
|
6 |
+
from .wavelet import noise_estimation
|
7 |
+
|
8 |
+
__all__ = [
|
9 |
+
'bit_plane_extractor',
|
10 |
+
'ELA',
|
11 |
+
# 'exif_full_dump',
|
12 |
+
'gradient_processing',
|
13 |
+
'minmax_process',
|
14 |
+
'noise_estimation'
|
15 |
+
]
|
{utils β forensics}/bitplane.py
RENAMED
@@ -8,7 +8,14 @@ def bit_plane_extractor(
|
|
8 |
bit: int = 0,
|
9 |
filter_type: str = "Disabled"
|
10 |
) -> Image.Image:
|
11 |
-
"""Extract and visualize a bit plane from a selected channel of the image.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
img = np.array(image.convert("RGB"))
|
13 |
if channel == "Luminance":
|
14 |
img = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
|
|
|
8 |
bit: int = 0,
|
9 |
filter_type: str = "Disabled"
|
10 |
) -> Image.Image:
|
11 |
+
"""Extract and visualize a bit plane from a selected channel of the image.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
image (Image.Image, string: filepath): The input image to analyze.
|
15 |
+
channel (str, optional): The channel to extract. Defaults to "Luminance".
|
16 |
+
bit (int, optional): The bit to extract. Defaults to 0.
|
17 |
+
filter_type (str, optional): The type of filter to apply. Defaults to "Disabled".
|
18 |
+
"""
|
19 |
img = np.array(image.convert("RGB"))
|
20 |
if channel == "Luminance":
|
21 |
img = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
|
{utils β forensics}/ela.py
RENAMED
@@ -24,7 +24,7 @@ def elapsed_time(start):
|
|
24 |
"""Calculate elapsed time since start."""
|
25 |
return f"{time() - start:.3f}s"
|
26 |
|
27 |
-
def
|
28 |
"""
|
29 |
Perform Error Level Analysis on an image.
|
30 |
|
@@ -61,4 +61,4 @@ def genELA(img, quality=75, scale=50, contrast=20, linear=False, grayscale=False
|
|
61 |
if grayscale:
|
62 |
ela = desaturate(ela)
|
63 |
|
64 |
-
return Image.fromarray(ela)
|
|
|
24 |
"""Calculate elapsed time since start."""
|
25 |
return f"{time() - start:.3f}s"
|
26 |
|
27 |
+
def ELA(img, quality=75, scale=50, contrast=20, linear=False, grayscale=False):
|
28 |
"""
|
29 |
Perform Error Level Analysis on an image.
|
30 |
|
|
|
61 |
if grayscale:
|
62 |
ela = desaturate(ela)
|
63 |
|
64 |
+
return Image.fromarray(ela)
|
forensics/exif.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# import tempfile
|
2 |
+
# import exiftool
|
3 |
+
# from PIL import Image
|
4 |
+
|
5 |
+
# def exif_full_dump(image: Image.Image) -> dict:
|
6 |
+
# """Extract all EXIF metadata from an image using exiftool."""
|
7 |
+
# with tempfile.NamedTemporaryFile(suffix='.jpg', delete=True) as tmp:
|
8 |
+
# image.save(tmp.name)
|
9 |
+
# with exiftool.ExifTool() as et:
|
10 |
+
# metadata = et.get_metadata(tmp.name)
|
11 |
+
# return metadata
|
{utils β forensics}/gradient.py
RENAMED
@@ -19,6 +19,18 @@ def create_lut(intensity, gamma):
|
|
19 |
return lut
|
20 |
|
21 |
def gradient_processing(image, intensity=90, blue_mode="Abs", invert=False, equalize=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
image = np.array(image)
|
23 |
dx, dy = cv.spatialGradient(cv.cvtColor(image, cv.COLOR_BGR2GRAY))
|
24 |
intensity = int(intensity / 100 * 127)
|
@@ -47,4 +59,4 @@ def gradient_processing(image, intensity=90, blue_mode="Abs", invert=False, equa
|
|
47 |
gradient = equalize_img(gradient)
|
48 |
elif intensity > 0:
|
49 |
gradient = cv.LUT(gradient, create_lut(intensity, intensity))
|
50 |
-
return Image.fromarray(gradient)
|
|
|
19 |
return lut
|
20 |
|
21 |
def gradient_processing(image, intensity=90, blue_mode="Abs", invert=False, equalize=False):
|
22 |
+
"""Apply gradient processing to an image.
|
23 |
+
|
24 |
+
Args:
|
25 |
+
image (Image.Image, string: filepath): The input image to analyze.
|
26 |
+
intensity (int, optional): The intensity of the gradient. Defaults to 90.
|
27 |
+
blue_mode (str, optional): The mode to use for the blue channel. Defaults to "Abs".
|
28 |
+
invert (bool, optional): Whether to invert the gradient. Defaults to False.
|
29 |
+
equalize (bool, optional): Whether to equalize the gradient. Defaults to False.
|
30 |
+
|
31 |
+
Returns:
|
32 |
+
Image.Image: A PIL image of the gradient.
|
33 |
+
"""
|
34 |
image = np.array(image)
|
35 |
dx, dy = cv.spatialGradient(cv.cvtColor(image, cv.COLOR_BGR2GRAY))
|
36 |
intensity = int(intensity / 100 * 127)
|
|
|
59 |
gradient = equalize_img(gradient)
|
60 |
elif intensity > 0:
|
61 |
gradient = cv.LUT(gradient, create_lut(intensity, intensity))
|
62 |
+
return Image.fromarray(gradient)
|
{utils β forensics}/minmax.py
RENAMED
@@ -27,7 +27,19 @@ def blk_filter(img, radius):
|
|
27 |
)
|
28 |
return cv.normalize(result, None, 0, 127, cv.NORM_MINMAX, cv.CV_8UC1)
|
29 |
|
30 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
if not isinstance(image, np.ndarray):
|
32 |
image = np.array(image) # Ensure image is a NumPy array
|
33 |
if channel == 0:
|
@@ -79,4 +91,4 @@ def preprocess(image, channel=4, radius=2):
|
|
79 |
elif channel == 3:
|
80 |
minmax[low] = [255, 255, 255]
|
81 |
minmax[high] = [255, 255, 255]
|
82 |
-
return Image.fromarray(minmax)
|
|
|
27 |
)
|
28 |
return cv.normalize(result, None, 0, 127, cv.NORM_MINMAX, cv.CV_8UC1)
|
29 |
|
30 |
+
def minmax_process(image, channel=4, radius=2):
|
31 |
+
"""
|
32 |
+
Analyzes local pixel value deviations in an image to detect potential manipulation by comparing each pixel with its neighbors. This forensic technique helps identify areas where pixel values deviate significantly from their local context, which can indicate digital tampering or editing.
|
33 |
+
|
34 |
+
Args:
|
35 |
+
image: Union[np.ndarray, Image.Image]: Input image to process
|
36 |
+
channel (int): Channel to process (0:Grayscale, 1:Blue, 2:Green, 3:Red, 4:RGB Norm)
|
37 |
+
radius (int): Radius for block filtering
|
38 |
+
Returns:
|
39 |
+
PIL.Image.Image: The processed image showing minmax deviations.
|
40 |
+
Raises:
|
41 |
+
ValueError: If the input image is invalid or channel/radius parameters are out of valid range.
|
42 |
+
"""
|
43 |
if not isinstance(image, np.ndarray):
|
44 |
image = np.array(image) # Ensure image is a NumPy array
|
45 |
if channel == 0:
|
|
|
91 |
elif channel == 3:
|
92 |
minmax[low] = [255, 255, 255]
|
93 |
minmax[high] = [255, 255, 255]
|
94 |
+
return Image.fromarray(minmax)
|
{utils β forensics}/wavelet.py
RENAMED
@@ -3,8 +3,16 @@ import pywt
|
|
3 |
import cv2
|
4 |
from PIL import Image
|
5 |
|
6 |
-
def
|
7 |
-
"""Estimate local noise using wavelet blocking. Returns a PIL image of the noise map.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
im = np.array(image.convert('L'))
|
9 |
y = np.double(im)
|
10 |
cA1, (cH, cV, cD) = pywt.dwt2(y, 'db8')
|
|
|
3 |
import cv2
|
4 |
from PIL import Image
|
5 |
|
6 |
+
def noise_estimation(image: Image.Image, blocksize: int = 8) -> Image.Image:
|
7 |
+
"""Estimate local noise using wavelet blocking. Returns a PIL image of the noise map.
|
8 |
+
|
9 |
+
Args:
|
10 |
+
image (Image.Image, string: filepath): The input image to analyze.
|
11 |
+
blocksize (int): The size of the blocks to use for wavelet blocking.
|
12 |
+
|
13 |
+
Returns:
|
14 |
+
Image.Image: A PIL image of the noise map.
|
15 |
+
"""
|
16 |
im = np.array(image.convert('L'))
|
17 |
y = np.double(im)
|
18 |
cA1, (cH, cV, cD) = pywt.dwt2(y, 'db8')
|
graph.svg
ADDED
|
graph_alt.svg
ADDED
|
hf_inference_logs/log_20250611031830376635.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
preview/.gitkeep
ADDED
File without changes
|
preview/1.png
ADDED
![]() |
Git LFS Details
|
preview/127.0.0.1_7860__.png
ADDED
![]() |
Git LFS Details
|
preview/2.png
ADDED
![]() |
Git LFS Details
|
preview/3.png
ADDED
![]() |
Git LFS Details
|
preview/4.png
ADDED
![]() |
Git LFS Details
|
preview/graph.png
ADDED
![]() |
requirements.txt
CHANGED
@@ -1,18 +1,37 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
transformers
|
4 |
-
|
5 |
torchvision
|
6 |
-
|
7 |
-
|
8 |
-
# pillow
|
9 |
opencv-python
|
10 |
-
modelscope_studio
|
11 |
-
pydantic==2.10.6
|
12 |
-
tf-keras
|
13 |
-
PyWavelets
|
14 |
-
pyexiftool
|
15 |
-
psutil
|
16 |
-
datasets
|
17 |
Pillow
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Core ML/AI libraries
|
3 |
+
git+https://github.com/huggingface/transformers.git
|
4 |
+
torch
|
5 |
torchvision
|
6 |
+
torchaudio
|
7 |
+
# Image processing
|
|
|
8 |
opencv-python
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
Pillow
|
10 |
+
|
11 |
+
# Wavelet processing
|
12 |
+
PyWavelets==1.8.0
|
13 |
+
|
14 |
+
# System utilities
|
15 |
+
psutil
|
16 |
+
GPUtil
|
17 |
+
python-dotenv
|
18 |
+
onnxruntime
|
19 |
+
# Gradio and UI
|
20 |
+
gradio[mcp]>=5.33.1
|
21 |
+
# gradio_leaderboard==0.0.13
|
22 |
+
gradio_client==1.10.3
|
23 |
+
spaces
|
24 |
+
gradio_log
|
25 |
+
|
26 |
+
# HuggingFace ecosystem
|
27 |
+
huggingface_hub[hf_xet]>=0.31.0
|
28 |
+
datasets>=3.5.0
|
29 |
+
|
30 |
+
# Data validation and utilities
|
31 |
+
pydantic==2.11.5
|
32 |
+
|
33 |
+
# AI agents
|
34 |
+
smolagents[toolkit,mcp,openai,transformers,vision,gradio]
|
35 |
+
|
36 |
+
# Optional: EXIF metadata (if needed)
|
37 |
+
pyexiftool
|
temp_model_config/config.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_attn_implementation_autoset": true,
|
3 |
+
"_name_or_path": "aiwithoutborders-xyz/CommunityForensics-DeepfakeDet-ViT",
|
4 |
+
"architectures": [
|
5 |
+
"ViTForImageClassification"
|
6 |
+
],
|
7 |
+
"attention_probs_dropout_prob": 0.0,
|
8 |
+
"encoder_stride": 16,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.0,
|
11 |
+
"hidden_size": 384,
|
12 |
+
"image_size": 384,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 3072,
|
15 |
+
"layer_norm_eps": 1e-06,
|
16 |
+
"mlp_ratio": 4,
|
17 |
+
"model_type": "vit",
|
18 |
+
"num_attention_heads": 12,
|
19 |
+
"num_channels": 3,
|
20 |
+
"num_classes": 1,
|
21 |
+
"num_heads": 6,
|
22 |
+
"num_hidden_layers": 12,
|
23 |
+
"num_layers": 12,
|
24 |
+
"patch_size": 16,
|
25 |
+
"qkv_bias": true,
|
26 |
+
"torch_dtype": "float32",
|
27 |
+
"transformers_version": "4.49.0"
|
28 |
+
}
|
temp_original_vit_config/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"ViTForImageClassification"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.0,
|
6 |
+
"encoder_stride": 16,
|
7 |
+
"hidden_act": "gelu",
|
8 |
+
"hidden_dropout_prob": 0.0,
|
9 |
+
"hidden_size": 384,
|
10 |
+
"image_size": 384,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 3072,
|
13 |
+
"layer_norm_eps": 1e-06,
|
14 |
+
"mlp_ratio": 4,
|
15 |
+
"model_type": "vit",
|
16 |
+
"num_attention_heads": 12,
|
17 |
+
"num_channels": 3,
|
18 |
+
"num_classes": 1,
|
19 |
+
"num_heads": 6,
|
20 |
+
"num_hidden_layers": 12,
|
21 |
+
"num_layers": 12,
|
22 |
+
"patch_size": 16,
|
23 |
+
"qkv_bias": true,
|
24 |
+
"torch_dtype": "float32",
|
25 |
+
"transformers_version": "4.50.0.dev0"
|
26 |
+
}
|
utils/exif.py
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
import tempfile
|
2 |
-
import exiftool
|
3 |
-
from PIL import Image
|
4 |
-
|
5 |
-
def exif_full_dump(image: Image.Image) -> dict:
|
6 |
-
"""Extract all EXIF metadata from an image using exiftool."""
|
7 |
-
with tempfile.NamedTemporaryFile(suffix='.jpg', delete=True) as tmp:
|
8 |
-
image.save(tmp.name)
|
9 |
-
with exiftool.ExifTool() as et:
|
10 |
-
metadata = et.get_metadata(tmp.name)
|
11 |
-
return metadata
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/hf_logger.py
CHANGED
@@ -5,13 +5,13 @@ import io
|
|
5 |
import datetime
|
6 |
from PIL import Image
|
7 |
import logging
|
8 |
-
from huggingface_hub import HfApi,
|
9 |
import numpy as np
|
10 |
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
13 |
HF_DATASET_NAME = "aiwithoutborders-xyz/degentic_rd0"
|
14 |
-
LOCAL_LOG_DIR = "./hf_inference_logs"
|
15 |
|
16 |
# Custom JSON Encoder to handle numpy types
|
17 |
class NumpyEncoder(json.JSONEncoder):
|
@@ -20,18 +20,21 @@ class NumpyEncoder(json.JSONEncoder):
|
|
20 |
return float(obj)
|
21 |
return json.JSONEncoder.default(self, obj)
|
22 |
|
23 |
-
def
|
24 |
-
"""
|
25 |
-
# Explicitly check if the input is a PIL Image
|
26 |
if not isinstance(image, Image.Image):
|
27 |
raise TypeError(f"Expected a PIL Image, but received type: {type(image)}")
|
28 |
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
31 |
if image.mode != 'RGB':
|
32 |
image = image.convert('RGB')
|
33 |
-
image.save(
|
34 |
-
|
|
|
35 |
|
36 |
# The initialize_dataset function will change significantly or be removed/simplified
|
37 |
# as we are no longer appending to a datasets.Dataset object directly in memory
|
@@ -59,26 +62,26 @@ def log_inference_data(
|
|
59 |
try:
|
60 |
api = initialize_dataset_repo() # Get or create the repository
|
61 |
|
62 |
-
|
63 |
|
64 |
-
|
65 |
for img_item in forensic_images:
|
66 |
if img_item is not None:
|
67 |
if not isinstance(img_item, Image.Image):
|
68 |
try:
|
69 |
img_item = Image.fromarray(img_item)
|
70 |
except Exception as e:
|
71 |
-
logger.error(f"Error converting forensic image to PIL for
|
72 |
continue
|
73 |
-
|
74 |
|
75 |
new_entry = {
|
76 |
"timestamp": datetime.datetime.now().isoformat(),
|
77 |
-
"image":
|
78 |
"inference_request": inference_params,
|
79 |
"model_predictions": model_predictions,
|
80 |
"ensemble_output": ensemble_output,
|
81 |
-
"forensic_outputs":
|
82 |
"agent_monitoring_data": agent_monitoring_data,
|
83 |
"human_feedback": human_feedback if human_feedback is not None else {}
|
84 |
}
|
@@ -92,7 +95,18 @@ def log_inference_data(
|
|
92 |
with open(log_file_path, 'w', encoding='utf-8') as f:
|
93 |
json.dump(new_entry, f, cls=NumpyEncoder, indent=2)
|
94 |
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
except Exception as e:
|
98 |
logger.error(f"Failed to log inference data to local file: {e}")
|
|
|
5 |
import datetime
|
6 |
from PIL import Image
|
7 |
import logging
|
8 |
+
from huggingface_hub import HfApi, CommitScheduler
|
9 |
import numpy as np
|
10 |
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
13 |
HF_DATASET_NAME = "aiwithoutborders-xyz/degentic_rd0"
|
14 |
+
LOCAL_LOG_DIR = "./hf_inference_logs"
|
15 |
|
16 |
# Custom JSON Encoder to handle numpy types
|
17 |
class NumpyEncoder(json.JSONEncoder):
|
|
|
20 |
return float(obj)
|
21 |
return json.JSONEncoder.default(self, obj)
|
22 |
|
23 |
+
def _save_pil_image_to_file(image: Image.Image, directory: str, prefix: str) -> str:
|
24 |
+
"""Saves a PIL Image to a file and returns its filename."""
|
|
|
25 |
if not isinstance(image, Image.Image):
|
26 |
raise TypeError(f"Expected a PIL Image, but received type: {type(image)}")
|
27 |
|
28 |
+
os.makedirs(directory, exist_ok=True)
|
29 |
+
timestamp_str = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")
|
30 |
+
filename = f"{prefix}_{timestamp_str}.png"
|
31 |
+
file_path = os.path.join(directory, filename)
|
32 |
+
|
33 |
if image.mode != 'RGB':
|
34 |
image = image.convert('RGB')
|
35 |
+
image.save(file_path, format="PNG")
|
36 |
+
logger.info(f"Saved image to: {file_path}")
|
37 |
+
return filename
|
38 |
|
39 |
# The initialize_dataset function will change significantly or be removed/simplified
|
40 |
# as we are no longer appending to a datasets.Dataset object directly in memory
|
|
|
62 |
try:
|
63 |
api = initialize_dataset_repo() # Get or create the repository
|
64 |
|
65 |
+
original_image_filename = _save_pil_image_to_file(original_image, LOCAL_LOG_DIR, "original")
|
66 |
|
67 |
+
forensic_images_filenames = []
|
68 |
for img_item in forensic_images:
|
69 |
if img_item is not None:
|
70 |
if not isinstance(img_item, Image.Image):
|
71 |
try:
|
72 |
img_item = Image.fromarray(img_item)
|
73 |
except Exception as e:
|
74 |
+
logger.error(f"Error converting forensic image to PIL for saving: {e}")
|
75 |
continue
|
76 |
+
forensic_images_filenames.append(_save_pil_image_to_file(img_item, LOCAL_LOG_DIR, "forensic"))
|
77 |
|
78 |
new_entry = {
|
79 |
"timestamp": datetime.datetime.now().isoformat(),
|
80 |
+
"image": original_image_filename,
|
81 |
"inference_request": inference_params,
|
82 |
"model_predictions": model_predictions,
|
83 |
"ensemble_output": ensemble_output,
|
84 |
+
"forensic_outputs": forensic_images_filenames,
|
85 |
"agent_monitoring_data": agent_monitoring_data,
|
86 |
"human_feedback": human_feedback if human_feedback is not None else {}
|
87 |
}
|
|
|
95 |
with open(log_file_path, 'w', encoding='utf-8') as f:
|
96 |
json.dump(new_entry, f, cls=NumpyEncoder, indent=2)
|
97 |
|
98 |
+
# Schedule commit to Hugging Face dataset repository
|
99 |
+
scheduler = CommitScheduler(
|
100 |
+
repo_id=HF_DATASET_NAME,
|
101 |
+
repo_type="dataset",
|
102 |
+
folder_path=LOCAL_LOG_DIR,
|
103 |
+
path_in_repo="logs",
|
104 |
+
token=os.getenv("HF_TOKEN"),
|
105 |
+
every=10 # Commit every 10 files
|
106 |
+
)
|
107 |
+
|
108 |
+
with scheduler:
|
109 |
+
logger.info(f"Inference data logged successfully to local file: {log_file_path}")
|
110 |
|
111 |
except Exception as e:
|
112 |
logger.error(f"Failed to log inference data to local file: {e}")
|
utils/load.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import tempfile
|
3 |
+
from typing import Any, Callable, List, Optional, Tuple, Union
|
4 |
+
from urllib.parse import unquote, urlparse
|
5 |
+
|
6 |
+
import PIL.Image
|
7 |
+
import PIL.ImageOps
|
8 |
+
import requests
|
9 |
+
|
10 |
+
def load_image(
|
11 |
+
image: Union[str, PIL.Image.Image], convert_method: Optional[Callable[[PIL.Image.Image], PIL.Image.Image]] = None
|
12 |
+
) -> PIL.Image.Image:
|
13 |
+
"""
|
14 |
+
Loads `image` to a PIL Image.
|
15 |
+
|
16 |
+
Args:
|
17 |
+
image (`str` or `PIL.Image.Image`):
|
18 |
+
The image to convert to the PIL Image format.
|
19 |
+
convert_method (Callable[[PIL.Image.Image], PIL.Image.Image], *optional*):
|
20 |
+
A conversion method to apply to the image after loading it. When set to `None` the image will be converted
|
21 |
+
"RGB".
|
22 |
+
|
23 |
+
Returns:
|
24 |
+
`PIL.Image.Image`:
|
25 |
+
A PIL Image.
|
26 |
+
"""
|
27 |
+
if isinstance(image, str):
|
28 |
+
if image.startswith("http://") or image.startswith("https://"):
|
29 |
+
image = PIL.Image.open(requests.get(image, stream=True, timeout=600).raw)
|
30 |
+
elif os.path.isfile(image):
|
31 |
+
image = PIL.Image.open(image)
|
32 |
+
else:
|
33 |
+
raise ValueError(
|
34 |
+
f"Incorrect path or URL. URLs must start with `http://` or `https://`, and {image} is not a valid path."
|
35 |
+
)
|
36 |
+
elif isinstance(image, PIL.Image.Image):
|
37 |
+
image = image
|
38 |
+
else:
|
39 |
+
raise ValueError(
|
40 |
+
"Incorrect format used for the image. Should be a URL linking to an image, a local path, or a PIL image."
|
41 |
+
)
|
42 |
+
|
43 |
+
image = PIL.ImageOps.exif_transpose(image)
|
44 |
+
|
45 |
+
if convert_method is not None:
|
46 |
+
image = convert_method(image)
|
47 |
+
else:
|
48 |
+
image = image.convert("RGB")
|
49 |
+
|
50 |
+
return image
|
51 |
+
|
{forensics β utils}/registry.py
RENAMED
@@ -2,7 +2,8 @@ from typing import Callable, Dict, Any, List, Optional
|
|
2 |
|
3 |
class ModelEntry:
|
4 |
def __init__(self, model: Any, preprocess: Callable, postprocess: Callable, class_names: List[str],
|
5 |
-
display_name: Optional[str] = None, contributor: Optional[str] = None, model_path: Optional[str] = None
|
|
|
6 |
self.model = model
|
7 |
self.preprocess = preprocess
|
8 |
self.postprocess = postprocess
|
@@ -10,8 +11,10 @@ class ModelEntry:
|
|
10 |
self.display_name = display_name
|
11 |
self.contributor = contributor
|
12 |
self.model_path = model_path
|
|
|
|
|
13 |
|
14 |
MODEL_REGISTRY: Dict[str, ModelEntry] = {}
|
15 |
|
16 |
-
def register_model(model_id: str, model: Any, preprocess: Callable, postprocess: Callable, class_names: List[str]):
|
17 |
-
MODEL_REGISTRY[model_id] = ModelEntry(model, preprocess, postprocess, class_names)
|
|
|
2 |
|
3 |
class ModelEntry:
|
4 |
def __init__(self, model: Any, preprocess: Callable, postprocess: Callable, class_names: List[str],
|
5 |
+
display_name: Optional[str] = None, contributor: Optional[str] = None, model_path: Optional[str] = None,
|
6 |
+
architecture: Optional[str] = None, dataset: Optional[str] = None):
|
7 |
self.model = model
|
8 |
self.preprocess = preprocess
|
9 |
self.postprocess = postprocess
|
|
|
11 |
self.display_name = display_name
|
12 |
self.contributor = contributor
|
13 |
self.model_path = model_path
|
14 |
+
self.architecture = architecture
|
15 |
+
self.dataset = dataset
|
16 |
|
17 |
MODEL_REGISTRY: Dict[str, ModelEntry] = {}
|
18 |
|
19 |
+
def register_model(model_id: str, model: Any, preprocess: Callable, postprocess: Callable, class_names: List[str], architecture: Optional[str] = None, dataset: Optional[str] = None):
|
20 |
+
MODEL_REGISTRY[model_id] = ModelEntry(model, preprocess, postprocess, class_names, architecture=architecture, dataset=dataset)
|
utils/utils.py
CHANGED
@@ -5,7 +5,8 @@ from torchvision import transforms
|
|
5 |
|
6 |
def softmax(vector):
|
7 |
e = np.exp(vector - np.max(vector)) # for numerical stability
|
8 |
-
|
|
|
9 |
|
10 |
def augment_image(img_pil, methods, rotate_degrees=0, noise_level=0, sharpen_strength=1):
|
11 |
for method in methods:
|
|
|
5 |
|
6 |
def softmax(vector):
|
7 |
e = np.exp(vector - np.max(vector)) # for numerical stability
|
8 |
+
probabilities = e / e.sum()
|
9 |
+
return [float(p.item()) for p in probabilities] # Convert numpy array elements to Python floats using .item()
|
10 |
|
11 |
def augment_image(img_pil, methods, rotate_degrees=0, noise_level=0, sharpen_strength=1):
|
12 |
for method in methods:
|