OpenSight-Deepfake-Detection-Models-Playground

Running

LPX commited on Jun 7

Commit

932e7b4

1 Parent(s): 6efb635

major(huge refactoring)

- Added entries to .gitignore for Python cache files and directories.
- Updated README.md title, description, and SDK version.
- Changed app file reference from app.py to app_mcp.py.

Files changed (4) hide show

.gitignore +3 -1
README.md +5 -5
app_mcp.py +326 -0
forensics/registry.py +13 -0

.gitignore CHANGED Viewed

@@ -2,4 +2,6 @@
 *goat.py
 .vscode
 *onnx.py
-./models/*

 *goat.py
 .vscode
 *onnx.py
+./models/*
+forensics/__pycache__/*
+*.cpython-311.pyc

README.md CHANGED Viewed

@@ -1,11 +1,12 @@
 ---
-title: OpenSight - Deepfake Detection Models Eval
-emoji: 🏆
 colorFrom: yellow
 colorTo: yellow
 sdk: gradio
-sdk_version: 5.25.2
-app_file: app.py
 pinned: true
 models:
 - aiwithoutborders-xyz/OpenSight-CommunityForensics-Deepfake-ViT
@@ -13,7 +14,6 @@ models:
 - haywoodsloan/ai-image-detector-deploy
 - cmckinle/sdxl-flux-detector
 - Organika/sdxl-detector
-- prithivMLmods/Deepfake-Detection-Exp-02-22
 license: mit
 ---

 ---
+title: Deepfake Detection & Forensics Tools
+description: MCP Server for Deepfake Detection & Digital Forensics Tools
+emoji: 🚑
 colorFrom: yellow
 colorTo: yellow
 sdk: gradio
+sdk_version: 5.33.0
+app_file: app_mcp.py
 pinned: true
 models:
 - aiwithoutborders-xyz/OpenSight-CommunityForensics-Deepfake-ViT
 - haywoodsloan/ai-image-detector-deploy
 - cmckinle/sdxl-flux-detector
 - Organika/sdxl-detector
 license: mit
 ---

app_mcp.py ADDED Viewed

	@@ -0,0 +1,326 @@

+import os
+from typing import Literal
+import spaces
+import gradio as gr
+import modelscope_studio.components.antd as antd
+import modelscope_studio.components.antdx as antdx
+import modelscope_studio.components.base as ms
+from transformers import pipeline, AutoImageProcessor, SwinForImageClassification, Swinv2ForImageClassification, AutoFeatureExtractor, AutoModelForImageClassification
+from torchvision import transforms
+import torch
+from PIL import Image
+import numpy as np
+import io
+import logging
+from utils.utils import softmax, augment_image, convert_pil_to_bytes
+from utils.gradient import gradient_processing
+from utils.minmax import preprocess as minmax_preprocess
+from utils.ela import genELA as ELA
+from forensics.registry import register_model, MODEL_REGISTRY
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Ensure using GPU if available
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+header_style = {
+    "textAlign": 'center',
+    "color": '#fff',
+    "height": 64,
+    "paddingInline": 48,
+    "lineHeight": '64px',
+    "backgroundColor": '#4096ff',
+}
+content_style = {
+    "textAlign": 'center',
+    "minHeight": 120,
+    "lineHeight": '120px',
+    "color": '#fff',
+    "backgroundColor": '#0958d9',
+}
+sider_style = {
+    "textAlign": 'center',
+    "lineHeight": '120px',
+    "color": '#fff',
+    "backgroundColor": '#1677ff',
+}
+footer_style = {
+    "textAlign": 'center',
+    "color": '#fff',
+    "backgroundColor": '#4096ff',
+}
+layout_style = {
+    "borderRadius": 8,
+    "overflow": 'hidden',
+    "width": 'calc(100% - 8px)',
+    "maxWidth": 'calc(100% - 8px)',
+}
+# Model paths and class names
+MODEL_PATHS = {
+    "model_1": "haywoodsloan/ai-image-detector-deploy",
+    "model_2": "Heem2/AI-vs-Real-Image-Detection",
+    "model_3": "Organika/sdxl-detector",
+    "model_4": "cmckinle/sdxl-flux-detector_v1.1",
+    "model_5": "prithivMLmods/Deep-Fake-Detector-v2-Model",
+    "model_5b": "prithivMLmods/Deepfake-Detection-Exp-02-22",
+    "model_6": "ideepankarsharma2003/AI_ImageClassification_MidjourneyV6_SDXL",
+    "model_7": "date3k2/vit-real-fake-classification-v4"
+}
+CLASS_NAMES = {
+    "model_1": ['artificial', 'real'],
+    "model_2": ['AI Image', 'Real Image'],
+    "model_3": ['AI', 'Real'],
+    "model_4": ['AI', 'Real'],
+    "model_5": ['Realism', 'Deepfake'],
+    "model_5b": ['Real', 'Deepfake'],
+    "model_6": ['ai_gen', 'human'],
+    "model_7": ['Fake', 'Real'],
+}
+def preprocess_resize_256(image):
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+    return transforms.Resize((256, 256))(image)
+def preprocess_resize_224(image):
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+    return transforms.Resize((224, 224))(image)
+def postprocess_pipeline(prediction, class_names):
+    # Assumes HuggingFace pipeline output
+    return {pred['label']: pred['score'] for pred in prediction}
+def postprocess_logits(outputs, class_names):
+    # Assumes model output with logits
+    logits = outputs.logits.cpu().numpy()[0]
+    probabilities = softmax(logits)
+    return {class_names[i]: probabilities[i] for i in range(len(class_names))}
+# Load and register models (example for two models)
+image_processor_1 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_1"], use_fast=True)
+model_1 = Swinv2ForImageClassification.from_pretrained(MODEL_PATHS["model_1"]).to(device)
+clf_1 = pipeline(model=model_1, task="image-classification", image_processor=image_processor_1, device=device)
+register_model(
+    "model_1",
+    clf_1,
+    preprocess_resize_256,
+    postprocess_pipeline,
+    CLASS_NAMES["model_1"]
+)
+clf_2 = pipeline("image-classification", model=MODEL_PATHS["model_2"], device=device)
+register_model(
+    "model_2",
+    clf_2,
+    preprocess_resize_224,
+    postprocess_pipeline,
+    CLASS_NAMES["model_2"]
+)
+# Register remaining models
+feature_extractor_3 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_3"], device=device)
+model_3 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_3"]).to(device)
+def preprocess_256(image):
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+    return transforms.Resize((256, 256))(image)
+def postprocess_logits_model3(outputs, class_names):
+    logits = outputs.logits.cpu().numpy()[0]
+    probabilities = softmax(logits)
+    return {class_names[i]: probabilities[i] for i in range(len(class_names))}
+def model3_infer(image):
+    inputs = feature_extractor_3(image, return_tensors="pt").to(device)
+    with torch.no_grad():
+        outputs = model_3(**inputs)
+    return outputs
+register_model(
+    "model_3",
+    model3_infer,
+    preprocess_256,
+    postprocess_logits_model3,
+    CLASS_NAMES["model_3"]
+)
+feature_extractor_4 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_4"], device=device)
+model_4 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_4"]).to(device)
+def model4_infer(image):
+    inputs = feature_extractor_4(image, return_tensors="pt").to(device)
+    with torch.no_grad():
+        outputs = model_4(**inputs)
+    return outputs
+def postprocess_logits_model4(outputs, class_names):
+    logits = outputs.logits.cpu().numpy()[0]
+    probabilities = softmax(logits)
+    return {class_names[i]: probabilities[i] for i in range(len(class_names))}
+register_model(
+    "model_4",
+    model4_infer,
+    preprocess_256,
+    postprocess_logits_model4,
+    CLASS_NAMES["model_4"]
+)
+clf_5 = pipeline("image-classification", model=MODEL_PATHS["model_5"], device=device)
+register_model(
+    "model_5",
+    clf_5,
+    preprocess_resize_224,
+    postprocess_pipeline,
+    CLASS_NAMES["model_5"]
+)
+clf_5b = pipeline("image-classification", model=MODEL_PATHS["model_5b"], device=device)
+register_model(
+    "model_5b",
+    clf_5b,
+    preprocess_resize_224,
+    postprocess_pipeline,
+    CLASS_NAMES["model_5b"]
+)
+image_processor_6 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_6"], use_fast=True)
+model_6 = SwinForImageClassification.from_pretrained(MODEL_PATHS["model_6"]).to(device)
+clf_6 = pipeline(model=model_6, task="image-classification", image_processor=image_processor_6, device=device)
+register_model(
+    "model_6",
+    clf_6,
+    preprocess_resize_224,
+    postprocess_pipeline,
+    CLASS_NAMES["model_6"]
+)
+image_processor_7 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_7"], use_fast=True)
+model_7 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_7"]).to(device)
+clf_7 = pipeline(model=model_7, task="image-classification", image_processor=image_processor_7, device=device)
+register_model(
+    "model_7",
+    clf_7,
+    preprocess_resize_224,
+    postprocess_pipeline,
+    CLASS_NAMES["model_7"]
+)
+# Generic inference function
+def infer(image: Image.Image, model_id: str, confidence_threshold: float = 0.75) -> dict:
+    entry = MODEL_REGISTRY[model_id]
+    img = entry.preprocess(image)
+    try:
+        result = entry.model(img)
+        result = entry.postprocess(result, entry.class_names)
+        # Add confidence threshold logic if needed
+        return result
+    except Exception as e:
+        return {"error": str(e)}
+# Update predict_image to use all registered models in order
+def predict_image(img, confidence_threshold):
+    model_ids = [
+        "model_1", "model_2", "model_3", "model_4", "model_5", "model_5b", "model_6", "model_7"
+    ]
+    results = [infer(img, model_id, confidence_threshold) for model_id in model_ids]
+    return img, results
+# Update predict_image_with_json to return results as a list of dicts
+def predict_image_with_json(img, confidence_threshold, augment_methods, rotate_degrees, noise_level, sharpen_strength):
+    if augment_methods:
+        img_pil, _ = augment_image(img, augment_methods, rotate_degrees, noise_level, sharpen_strength)
+    else:
+        img_pil = img
+    img_pil, results = predict_image(img_pil, confidence_threshold)
+    img_np = np.array(img_pil)  # Convert PIL Image to NumPy array
+    img_np_og = np.array(img)  # Convert PIL Image to NumPy array
+    gradient_image = gradient_processing(img_np)  # Added gradient processing
+    minmax_image = minmax_preprocess(img_np)  # Added MinMax processing
+    # First pass - standard analysis
+    ela1 = ELA(img_np_og, quality=75, scale=50, contrast=20, linear=False, grayscale=True)
+    # Second pass - enhanced visibility
+    ela2 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=True)
+    ela3 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=False)
+    forensics_images = [img_pil, ela1, ela2, ela3, gradient_image, minmax_image]
+    return img_pil, forensics_images, results
+with gr.Blocks(css="#post-gallery { overflow: hidden !important;} .grid-wrap{ overflow-y: hidden !important;} .ms-gr-ant-welcome-icon{ height:unset !important;} .tabs{margin-top:10px;}") as iface:
+    with ms.Application() as app:
+        with antd.ConfigProvider():
+            antdx.Welcome(
+                icon=
+                "https://cdn-avatars.huggingface.co/v1/production/uploads/639daf827270667011153fbc/WpeSFhuB81DY-1TjNUmV_.png",
+                title="Welcome to Project OpenSight",
+                description=
+                "The OpenSight aims to be an open-source SOTA generated image detection model. This HF Space is not only an introduction but a educational playground for the public to evaluate and challenge current open source models.  **Space will be upgraded shortly; inference on all 6 models should take about 1.2~ seconds.** "
+            )
+            with gr.Tab("👀 Detection Models Eval / Playground"):
+                gr.Markdown("# Open Source Detection Models Found on the Hub\n\n - **Space will be upgraded shortly;** inference on all 6 models should take about 1.2~ seconds once we're back on CUDA.\n - The **Community Forensics** mother of all detection models is now available for inference, head to the middle tab above this.\n - Lots of exciting things coming up, stay tuned!")
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        image_input = gr.Image(label="Upload Image to Analyze", sources=['upload', 'webcam'], type='pil')
+                        with gr.Accordion("Settings (Optional)", open=False, elem_id="settings_accordion"):
+                            augment_checkboxgroup = gr.CheckboxGroup(["rotate", "add_noise", "sharpen"], label="Augmentation Methods")
+                            rotate_slider = gr.Slider(0, 45, value=2, step=1, label="Rotate Degrees", visible=False)
+                            noise_slider = gr.Slider(0, 50, value=4, step=1, label="Noise Level", visible=False)
+                            sharpen_slider = gr.Slider(0, 50, value=11, step=1, label="Sharpen Strength", visible=False)
+                            confidence_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Confidence Threshold")
+                        inputs = [image_input, confidence_slider, augment_checkboxgroup, rotate_slider, noise_slider, sharpen_slider]
+                        predict_button = gr.Button("Predict")
+                        augment_button = gr.Button("Augment & Predict")
+                        image_output = gr.Image(label="Processed Image", visible=False)
+                    with gr.Column(scale=2):
+                        # Use Gradio-native Dataframe to display results
+                        results_table = gr.Dataframe(label="Model Predictions", headers=None, datatype="auto")
+                        forensics_gallery = gr.Gallery(label="Post Processed Images", visible=True, columns=[4], rows=[2], container=False, height="auto", object_fit="contain", elem_id="post-gallery")
+                        outputs = [image_output, forensics_gallery, results_table]
+                # Show/hide rotate slider based on selected augmentation method
+                augment_checkboxgroup.change(lambda methods: gr.update(visible="rotate" in methods), inputs=[augment_checkboxgroup], outputs=[rotate_slider])
+                augment_checkboxgroup.change(lambda methods: gr.update(visible="add_noise" in methods), inputs=[augment_checkboxgroup], outputs=[noise_slider])
+                augment_checkboxgroup.change(lambda methods: gr.update(visible="sharpen" in methods), inputs=[augment_checkboxgroup], outputs=[sharpen_slider])
+                predict_button.click(
+                    fn=predict_image_with_json,
+                    inputs=inputs,
+                    outputs=outputs
+                )
+                augment_button.click(  # Connect Augment button to the function
+                    fn=predict_image_with_json,
+                    inputs=[
+                        image_input,
+                        confidence_slider,
+                        gr.CheckboxGroup(["rotate", "add_noise", "sharpen"], value=["rotate", "add_noise", "sharpen"], visible=False),  # Default values
+                        rotate_slider,
+                        noise_slider,
+                        sharpen_slider
+                    ],
+                    outputs=outputs
+                )
+            with gr.Tab("👑 Community Forensics Preview"):
+                temp_space = gr.load("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview", src="spaces")
+                # preview # no idea if this will work
+            with gr.Tab("🥇 Leaderboard"):
+                gr.Markdown("# AI Generated / Deepfake Detection Models Leaderboard: Soon™")
+# Launch the interface
+iface.launch()

forensics/registry.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from typing import Callable, Dict, Any, List
+class ModelEntry:
+    def __init__(self, model: Any, preprocess: Callable, postprocess: Callable, class_names: List[str]):
+        self.model = model
+        self.preprocess = preprocess
+        self.postprocess = postprocess
+        self.class_names = class_names
+MODEL_REGISTRY: Dict[str, ModelEntry] = {}
+def register_model(model_id: str, model: Any, preprocess: Callable, postprocess: Callable, class_names: List[str]):
+    MODEL_REGISTRY[model_id] = ModelEntry(model, preprocess, postprocess, class_names)