import gradio as gr from transformers import pipeline # Load the jailbreak classifcation model task = "text-classification" pretrained_model = "jackhhao/jailbreak-classifier" classifier = pipeline(task, model=pretrained_model) #Define the prediction function def classify_text(text): result = classifier(text) label = result[0]['label'] score = result[0]['score'] return f"Label: {label}, Confidence: {score:.4f}" #Create the interface iface = gr.Interface( fn = classify_text, inputs = gr.Textbox(lines=5, label="Enter a text"), outputs = "text", title = "Jailbreak Classification", description = "A simple interface to classify text as jailbreak or not jailbreak" ) # Launch the app if __name__ == "__main__": iface.launch()