import gradio as gr import json import re from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline # Load Swear Words try: with open("swearWord.json", "r") as f: swear_words = set(json.load(f)) print("Swear words loaded successfully.") except Exception as e: print(f"Failed to load swearWord.json: {e}") swear_words = set() # Load Model and Tokenizer try: tokenizer = AutoTokenizer.from_pretrained("eliasalbouzidi/distilbert-nsfw-text-classifier") model = AutoModelForSequenceClassification.from_pretrained("eliasalbouzidi/distilbert-nsfw-text-classifier") text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer) print("Model loaded successfully.") except Exception as e: print(f"Error loading model: {e}") exit(1) # Text Classification and Censorship Function def textclassifier(text): if not text.strip(): return "Empty input", "unknown", 0.0 # Censor known swear words def censor_word(word): return "***" if word.lower() in swear_words else word words = re.findall(r"\w+|[^\w\s]", text, re.UNICODE) censored_words = [censor_word(word) if re.match(r"\w+", word) else word for word in words] censored_text = " ".join(censored_words) # Run model on original input try: result = text_classifier(text) label = result[0]["label"] score = result[0]["score"] # Apply threshold for uncertainty threshold = 0.994 if label == "nsfw" and score < threshold: label = "uncertain" return censored_text, label, round(score, 4) except Exception as e: return censored_text, f"Error: {str(e)}", 0.0 # Gradio Interface interface = gr.Interface( fn=textclassifier, inputs=gr.Textbox(label="Enter text"), outputs=[ gr.Textbox(label="Censored Text"), gr.Label(label="NSFW Prediction"), gr.Number(label="Confidence Score") ], title="Text Censorship + NSFW Classifier", description="Censors known swear words using *** and classifies the original text as NSFW, Safe, or Uncertain." ) if __name__ == "__main__": interface.launch()