Spaces:
Sleeping
Sleeping
import gradio as gr | |
from gradio.components import Text | |
import joblib | |
import clean | |
import nltk | |
nltk.download('wordnet') | |
import numpy as np | |
import language_detection | |
from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
import torch | |
print("all imports worked") | |
# Load pre-trained model | |
model = joblib.load('model_joblib.pkl') | |
print("model load ") | |
tf = joblib.load('tf_joblib.pkl') | |
print("tfidf load ") | |
# Load Hindi abuse detection model | |
hindi_tokenizer = AutoTokenizer.from_pretrained("Hate-speech-CNERG/hindi-abusive-MuRIL") | |
hindi_model = AutoModelForSequenceClassification.from_pretrained("Hate-speech-CNERG/hindi-abusive-MuRIL") | |
print("Hindi model loaded") | |
def predict_hindi_text(text): | |
inputs = hindi_tokenizer(text, return_tensors="pt", padding=True, truncation=True) | |
outputs = hindi_model(**inputs) | |
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
scores = predictions[0].detach().numpy() | |
return scores | |
# Define function to predict whether sentence is abusive or not | |
def predict_abusive_lang(text): | |
print("original text ", text) | |
lang = language_detection.en_hi_detection(text) | |
print("language detected ", lang) | |
if lang=='eng': | |
cleaned_text = clean.text_cleaning(text) | |
print("cleaned text ", text) | |
text = tf.transform([cleaned_text]) | |
print("tfidf transformation ", text) | |
prediction = model.predict(text) | |
print("prediction ", prediction) | |
if len(prediction)!=0 and prediction[0]==0: | |
return ["Not Abusive", cleaned_text] | |
elif len(prediction)!=0 and prediction[0]==1: | |
return ["Abusive",cleaned_text] | |
else : | |
return ["Please write something in the comment box..","No cleaned text"] | |
elif lang=='hi': | |
print("using transformers for Hindi text") | |
scores = predict_hindi_text(text) | |
if scores[1] > scores[0]: # If score for abusive class is higher | |
return ["Abusive", text] | |
else: | |
return ["Not Abusive", text] | |
else: | |
return ["UN","No cleaned text"] | |
# Define the GRADIO output interfaces | |
output_interfaces = [ | |
gr.Textbox(label="Result"), | |
gr.Textbox(label="Cleaned text") | |
] | |
app = gr.Interface(predict_abusive_lang, inputs='text', outputs=output_interfaces, title="Abuse Classifier", description="Enter a sentence and the model will predict whether it is abusive or not.") | |
#Start the GRADIO app | |
app.launch() |