Spaces:

BhuvanRShetty
/

NSFW_TextClassifier

Running

File size: 2,215 Bytes

ae14a0c
a99d0d7
f7c002b
a99d0d7
ae14a0c
a99d0d7
 
 
 
 
 
 
 
ae14a0c
a99d0d7
 
 
 
 
 
 
 
 
 
f7c002b
a99d0d7
 
f7c002b
a99d0d7
f7c002b
 
 
a99d0d7
f7c002b
 
 
 
 
a99d0d7
 
 
 
 
f7c002b
a99d0d7
 
 
 
f7c002b
a99d0d7
 
f7c002b
a99d0d7
 
 
 
 
 
f7c002b
 
a99d0d7
 
f7c002b
 
a99d0d7
 
f7c002b

import gradio as gr
import json
import re
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline

# Load Swear Words
try:
    with open("swearWord.json", "r") as f:
        swear_words = set(json.load(f))
    print("Swear words loaded successfully.")
except Exception as e:
    print(f"Failed to load swearWord.json: {e}")
    swear_words = set()

# Load Model and Tokenizer
try:
    tokenizer = AutoTokenizer.from_pretrained("eliasalbouzidi/distilbert-nsfw-text-classifier")
    model = AutoModelForSequenceClassification.from_pretrained("eliasalbouzidi/distilbert-nsfw-text-classifier")
    text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
    print("Model loaded successfully.")
except Exception as e:
    print(f"Error loading model: {e}")
    exit(1)

# Text Classification and Censorship Function
def textclassifier(text):
    if not text.strip():
        return "Empty input", "unknown", 0.0

    # Censor known swear words
    def censor_word(word):
        return "***" if word.lower() in swear_words else word

    words = re.findall(r"\w+|[^\w\s]", text, re.UNICODE)
    censored_words = [censor_word(word) if re.match(r"\w+", word) else word for word in words]
    censored_text = " ".join(censored_words)

    # Run model on original input
    try:
        result = text_classifier(text)
        label = result[0]["label"]
        score = result[0]["score"]

        # Apply threshold for uncertainty
        threshold = 0.994
        if label == "nsfw" and score < threshold:
            label = "uncertain"

        return censored_text, label, round(score, 4)

    except Exception as e:
        return censored_text, f"Error: {str(e)}", 0.0

# Gradio Interface
interface = gr.Interface(
    fn=textclassifier,
    inputs=gr.Textbox(label="Enter text"),
    outputs=[
        gr.Textbox(label="Censored Text"),
        gr.Label(label="NSFW Prediction"),
        gr.Number(label="Confidence Score")
    ],
    title="Text Censorship + NSFW Classifier",
    description="Censors known swear words using *** and classifies the original text as NSFW, Safe, or Uncertain."
)

if __name__ == "__main__":
    interface.launch()