import gradio as gr import json from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline # Load Swear Words try: with open("swearWord.json", "r") as f: swear_words = set(json.load(f)) print("Swear words loaded successfully.") except Exception as e: print(f"Failed to load swearWord.json: {e}") swear_words = set() # Load Model and Tokenizer try: tokenizer = AutoTokenizer.from_pretrained("eliasalbouzidi/distilbert-nsfw-text-classifier") model = AutoModelForSequenceClassification.from_pretrained("eliasalbouzidi/distilbert-nsfw-text-classifier") text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer) print("Model loaded successfully.") except Exception as e: print(f"Error loading model: {e}") exit(1) # Text Classifier Function def textclassifier(text): if not text.strip(): return "Empty input", 0.0 # Check for swear words if any(word.lower() in swear_words for word in text.split()): return "swear-word", 1.0 # Use model try: result = text_classifier(text) label = result[0]["label"] score = result[0]["score"] # Threshold logic threshold = 0.994 if label == "nsfw" and score < threshold: label = "uncertain" return label, round(score, 4) except Exception as e: return f"Error: {str(e)}", 0.0 # Gradio Interface interface = gr.Interface( fn=textclassifier, inputs=gr.Textbox(label="Enter text"), outputs=[ gr.Label(label="Prediction"), gr.Number(label="Confidence Score") ], title="Text Classifier with Swear Word Filter", # description="First checks for swear words, then uses NSFW text classifier if no swear word is found." ) interface.launch()