|
import os |
|
from typing import Literal |
|
import spaces |
|
import gradio as gr |
|
import modelscope_studio.components.antd as antd |
|
import modelscope_studio.components.antdx as antdx |
|
import modelscope_studio.components.base as ms |
|
from transformers import pipeline, AutoImageProcessor, SwinForImageClassification, Swinv2ForImageClassification, AutoFeatureExtractor, AutoModelForImageClassification |
|
from torchvision import transforms |
|
import torch |
|
from PIL import Image |
|
import numpy as np |
|
import io |
|
import logging |
|
from utils.utils import softmax, augment_image, convert_pil_to_bytes |
|
from utils.gradient import gradient_processing |
|
from utils.minmax import preprocess as minmax_preprocess |
|
from utils.ela import genELA as ELA |
|
from forensics.registry import register_model, MODEL_REGISTRY, ModelEntry |
|
|
|
|
|
|
|
logging.basicConfig(level=logging.DEBUG) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
|
|
header_style = { |
|
"textAlign": 'center', |
|
"color": '#fff', |
|
"height": 64, |
|
"paddingInline": 48, |
|
"lineHeight": '64px', |
|
"backgroundColor": '#4096ff', |
|
} |
|
|
|
content_style = { |
|
"textAlign": 'center', |
|
"minHeight": 120, |
|
"lineHeight": '120px', |
|
"color": '#fff', |
|
"backgroundColor": '#0958d9', |
|
} |
|
|
|
sider_style = { |
|
"textAlign": 'center', |
|
"lineHeight": '120px', |
|
"color": '#fff', |
|
"backgroundColor": '#1677ff', |
|
} |
|
|
|
footer_style = { |
|
"textAlign": 'center', |
|
"color": '#fff', |
|
"backgroundColor": '#4096ff', |
|
} |
|
|
|
layout_style = { |
|
"borderRadius": 8, |
|
"overflow": 'hidden', |
|
"width": 'calc(100% - 8px)', |
|
"maxWidth": 'calc(100% - 8px)', |
|
} |
|
|
|
MODEL_PATHS = { |
|
"model_1": "haywoodsloan/ai-image-detector-deploy", |
|
"model_2": "Heem2/AI-vs-Real-Image-Detection", |
|
"model_3": "Organika/sdxl-detector", |
|
"model_4": "cmckinle/sdxl-flux-detector_v1.1", |
|
"model_5": "prithivMLmods/Deep-Fake-Detector-v2-Model", |
|
"model_5b": "prithivMLmods/Deepfake-Detection-Exp-02-22", |
|
"model_6": "ideepankarsharma2003/AI_ImageClassification_MidjourneyV6_SDXL", |
|
"model_7": "date3k2/vit-real-fake-classification-v4" |
|
} |
|
|
|
CLASS_NAMES = { |
|
"model_1": ['artificial', 'real'], |
|
"model_2": ['AI Image', 'Real Image'], |
|
"model_3": ['AI', 'Real'], |
|
"model_4": ['AI', 'Real'], |
|
"model_5": ['Realism', 'Deepfake'], |
|
"model_5b": ['Real', 'Deepfake'], |
|
"model_6": ['ai_gen', 'human'], |
|
"model_7": ['Fake', 'Real'], |
|
|
|
} |
|
|
|
def preprocess_resize_256(image): |
|
if image.mode != 'RGB': |
|
image = image.convert('RGB') |
|
return transforms.Resize((256, 256))(image) |
|
|
|
def preprocess_resize_224(image): |
|
if image.mode != 'RGB': |
|
image = image.convert('RGB') |
|
return transforms.Resize((224, 224))(image) |
|
|
|
def postprocess_pipeline(prediction, class_names): |
|
|
|
return {pred['label']: pred['score'] for pred in prediction} |
|
|
|
def postprocess_logits(outputs, class_names): |
|
|
|
logits = outputs.logits.cpu().numpy()[0] |
|
probabilities = softmax(logits) |
|
return {class_names[i]: probabilities[i] for i in range(len(class_names))} |
|
|
|
|
|
|
|
|
|
|
|
def register_model_with_metadata(model_id, model, preprocess, postprocess, class_names, display_name, contributor, model_path): |
|
entry = ModelEntry(model, preprocess, postprocess, class_names) |
|
entry.display_name = display_name |
|
entry.contributor = contributor |
|
entry.model_path = model_path |
|
MODEL_REGISTRY[model_id] = entry |
|
|
|
|
|
image_processor_1 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_1"], use_fast=True) |
|
model_1 = Swinv2ForImageClassification.from_pretrained(MODEL_PATHS["model_1"]).to(device) |
|
clf_1 = pipeline(model=model_1, task="image-classification", image_processor=image_processor_1, device=device) |
|
register_model_with_metadata( |
|
"model_1", clf_1, preprocess_resize_256, postprocess_pipeline, CLASS_NAMES["model_1"], |
|
display_name="SwinV2 Based", contributor="haywoodsloan", model_path=MODEL_PATHS["model_1"] |
|
) |
|
|
|
clf_2 = pipeline("image-classification", model=MODEL_PATHS["model_2"], device=device) |
|
register_model_with_metadata( |
|
"model_2", clf_2, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_2"], |
|
display_name="ViT Based", contributor="Heem2", model_path=MODEL_PATHS["model_2"] |
|
) |
|
|
|
|
|
feature_extractor_3 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_3"], device=device) |
|
model_3 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_3"]).to(device) |
|
def preprocess_256(image): |
|
if image.mode != 'RGB': |
|
image = image.convert('RGB') |
|
return transforms.Resize((256, 256))(image) |
|
def postprocess_logits_model3(outputs, class_names): |
|
logits = outputs.logits.cpu().numpy()[0] |
|
probabilities = softmax(logits) |
|
return {class_names[i]: probabilities[i] for i in range(len(class_names))} |
|
def model3_infer(image): |
|
inputs = feature_extractor_3(image, return_tensors="pt").to(device) |
|
with torch.no_grad(): |
|
outputs = model_3(**inputs) |
|
return outputs |
|
register_model_with_metadata( |
|
"model_3", model3_infer, preprocess_256, postprocess_logits_model3, CLASS_NAMES["model_3"], |
|
display_name="SDXL Dataset", contributor="Organika", model_path=MODEL_PATHS["model_3"] |
|
) |
|
|
|
feature_extractor_4 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_4"], device=device) |
|
model_4 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_4"]).to(device) |
|
def model4_infer(image): |
|
inputs = feature_extractor_4(image, return_tensors="pt").to(device) |
|
with torch.no_grad(): |
|
outputs = model_4(**inputs) |
|
return outputs |
|
def postprocess_logits_model4(outputs, class_names): |
|
logits = outputs.logits.cpu().numpy()[0] |
|
probabilities = softmax(logits) |
|
return {class_names[i]: probabilities[i] for i in range(len(class_names))} |
|
register_model_with_metadata( |
|
"model_4", model4_infer, preprocess_256, postprocess_logits_model4, CLASS_NAMES["model_4"], |
|
display_name="SDXL + FLUX", contributor="cmckinle", model_path=MODEL_PATHS["model_4"] |
|
) |
|
|
|
clf_5 = pipeline("image-classification", model=MODEL_PATHS["model_5"], device=device) |
|
register_model_with_metadata( |
|
"model_5", clf_5, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_5"], |
|
display_name="Vit Based", contributor="prithivMLmods", model_path=MODEL_PATHS["model_5"] |
|
) |
|
|
|
clf_5b = pipeline("image-classification", model=MODEL_PATHS["model_5b"], device=device) |
|
register_model_with_metadata( |
|
"model_5b", clf_5b, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_5b"], |
|
display_name="Vit Based, Newer Dataset", contributor="prithivMLmods", model_path=MODEL_PATHS["model_5b"] |
|
) |
|
|
|
image_processor_6 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_6"], use_fast=True) |
|
model_6 = SwinForImageClassification.from_pretrained(MODEL_PATHS["model_6"]).to(device) |
|
clf_6 = pipeline(model=model_6, task="image-classification", image_processor=image_processor_6, device=device) |
|
register_model_with_metadata( |
|
"model_6", clf_6, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_6"], |
|
display_name="Swin, Midj + SDXL", contributor="ideepankarsharma2003", model_path=MODEL_PATHS["model_6"] |
|
) |
|
|
|
image_processor_7 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_7"], use_fast=True) |
|
model_7 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_7"]).to(device) |
|
clf_7 = pipeline(model=model_7, task="image-classification", image_processor=image_processor_7, device=device) |
|
register_model_with_metadata( |
|
"model_7", clf_7, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_7"], |
|
display_name="ViT", contributor="temp", model_path=MODEL_PATHS["model_7"] |
|
) |
|
|
|
|
|
|
|
def infer(image: Image.Image, model_id: str, confidence_threshold: float = 0.75) -> dict: |
|
entry = MODEL_REGISTRY[model_id] |
|
img = entry.preprocess(image) |
|
try: |
|
result = entry.model(img) |
|
scores = entry.postprocess(result, entry.class_names) |
|
|
|
ai_score = scores.get(entry.class_names[0], 0.0) |
|
real_score = scores.get(entry.class_names[1], 0.0) |
|
label = "AI" if ai_score >= confidence_threshold else ("REAL" if real_score >= confidence_threshold else "UNCERTAIN") |
|
return { |
|
"Model": entry.display_name, |
|
"Contributor": entry.contributor, |
|
"HF Model Path": entry.model_path, |
|
"AI Score": ai_score, |
|
"Real Score": real_score, |
|
"Label": label |
|
} |
|
except Exception as e: |
|
return { |
|
"Model": entry.display_name, |
|
"Contributor": entry.contributor, |
|
"HF Model Path": entry.model_path, |
|
"AI Score": None, |
|
"Real Score": None, |
|
"Label": f"Error: {str(e)}" |
|
} |
|
|
|
|
|
|
|
def predict_image(img, confidence_threshold): |
|
model_ids = [ |
|
"model_1", "model_2", "model_3", "model_4", "model_5", "model_5b", "model_6", "model_7" |
|
] |
|
results = [infer(img, model_id, confidence_threshold) for model_id in model_ids] |
|
return img, results |
|
|
|
|
|
|
|
def predict_image_with_json(img, confidence_threshold, augment_methods, rotate_degrees, noise_level, sharpen_strength): |
|
if augment_methods: |
|
img_pil, _ = augment_image(img, augment_methods, rotate_degrees, noise_level, sharpen_strength) |
|
else: |
|
img_pil = img |
|
img_pil, results = predict_image(img_pil, confidence_threshold) |
|
img_np = np.array(img_pil) |
|
img_np_og = np.array(img) |
|
|
|
gradient_image = gradient_processing(img_np) |
|
minmax_image = minmax_preprocess(img_np) |
|
|
|
|
|
ela1 = ELA(img_np_og, quality=75, scale=50, contrast=20, linear=False, grayscale=True) |
|
|
|
|
|
ela2 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=True) |
|
ela3 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=False) |
|
|
|
forensics_images = [img_pil, ela1, ela2, ela3, gradient_image, minmax_image] |
|
|
|
|
|
table_rows = [[ |
|
r.get("Model", ""), |
|
r.get("Contributor", ""), |
|
r.get("AI Score", ""), |
|
r.get("Real Score", ""), |
|
r.get("Label", "") |
|
] for r in results] |
|
return img_pil, forensics_images, table_rows, results |
|
|
|
with gr.Blocks(css="#post-gallery { overflow: hidden !important;} .grid-wrap{ overflow-y: hidden !important;} .ms-gr-ant-welcome-icon{ height:unset !important;} .tabs{margin-top:10px;}") as iface: |
|
with ms.Application() as app: |
|
with antd.ConfigProvider(): |
|
antdx.Welcome( |
|
icon="https://cdn-avatars.huggingface.co/v1/production/uploads/639daf827270667011153fbc/WpeSFhuB81DY-1TjNUmV_.png", |
|
title="Welcome to Project OpenSight", |
|
description="The OpenSight aims to be an open-source SOTA generated image detection model. This HF Space is not only an introduction but a educational playground for the public to evaluate and challenge current open source models. **Space will be upgraded shortly; inference on all 6 models should take about 1.2~ seconds.** " |
|
) |
|
with gr.Tab("👀 Detection Models Eval / Playground"): |
|
gr.Markdown("# Open Source Detection Models Found on the Hub\n\n - **Space will be upgraded shortly;** inference on all 6 models should take about 1.2~ seconds once we're back on CUDA.\n - The **Community Forensics** mother of all detection models is now available for inference, head to the middle tab above this.\n - Lots of exciting things coming up, stay tuned!") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
image_input = gr.Image(label="Upload Image to Analyze", sources=['upload', 'webcam'], type='pil') |
|
with gr.Accordion("Settings (Optional)", open=False, elem_id="settings_accordion"): |
|
augment_checkboxgroup = gr.CheckboxGroup(["rotate", "add_noise", "sharpen"], label="Augmentation Methods") |
|
rotate_slider = gr.Slider(0, 45, value=2, step=1, label="Rotate Degrees", visible=False) |
|
noise_slider = gr.Slider(0, 50, value=4, step=1, label="Noise Level", visible=False) |
|
sharpen_slider = gr.Slider(0, 50, value=11, step=1, label="Sharpen Strength", visible=False) |
|
confidence_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Confidence Threshold") |
|
inputs = [image_input, confidence_slider, augment_checkboxgroup, rotate_slider, noise_slider, sharpen_slider] |
|
predict_button = gr.Button("Predict") |
|
augment_button = gr.Button("Augment & Predict") |
|
image_output = gr.Image(label="Processed Image", visible=False) |
|
|
|
|
|
with gr.Column(scale=2): |
|
|
|
results_table = gr.Dataframe( |
|
label="Model Predictions", |
|
headers=["Model", "Contributor", "AI Score", "Real Score", "Label"], |
|
datatype=["str", "str", "number", "number", "str"] |
|
) |
|
forensics_gallery = gr.Gallery(label="Post Processed Images", visible=True, columns=[4], rows=[2], container=False, height="auto", object_fit="contain", elem_id="post-gallery") |
|
with gr.Accordion("Debug Output (Raw JSON)", open=False): |
|
debug_json = gr.JSON(label="Raw Model Results") |
|
|
|
outputs = [image_output, forensics_gallery, results_table, debug_json] |
|
|
|
|
|
augment_checkboxgroup.change(lambda methods: gr.update(visible="rotate" in methods), inputs=[augment_checkboxgroup], outputs=[rotate_slider]) |
|
augment_checkboxgroup.change(lambda methods: gr.update(visible="add_noise" in methods), inputs=[augment_checkboxgroup], outputs=[noise_slider]) |
|
augment_checkboxgroup.change(lambda methods: gr.update(visible="sharpen" in methods), inputs=[augment_checkboxgroup], outputs=[sharpen_slider]) |
|
|
|
predict_button.click( |
|
fn=predict_image_with_json, |
|
inputs=inputs, |
|
outputs=outputs |
|
) |
|
augment_button.click( |
|
fn=predict_image_with_json, |
|
inputs=[ |
|
image_input, |
|
confidence_slider, |
|
gr.CheckboxGroup(["rotate", "add_noise", "sharpen"], value=["rotate", "add_noise", "sharpen"], visible=False), |
|
rotate_slider, |
|
noise_slider, |
|
sharpen_slider |
|
], |
|
outputs=outputs |
|
) |
|
with gr.Tab("👑 Community Forensics Preview"): |
|
temp_space = gr.load("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview", src="spaces") |
|
|
|
with gr.Tab("🥇 Leaderboard"): |
|
gr.Markdown("# AI Generated / Deepfake Detection Models Leaderboard: Soon™") |
|
|
|
|
|
|
|
iface.launch() |