File size: 2,445 Bytes
f3d8098 c6826a0 f3d8098 09e57ee ebce3fb f3d8098 09e57ee f3d8098 a10a6f5 f3d8098 a10a6f5 f3d8098 09e57ee a10a6f5 f3d8098 09e57ee f3d8098 b738da8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import gradio as gr
from gradio.components import Text
import joblib
import clean
import nltk
nltk.download('wordnet')
import numpy as np
import language_detection
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
print("all imports worked")
# Load pre-trained model
model = joblib.load('model_joblib.pkl')
print("model load ")
tf = joblib.load('tf_joblib.pkl')
print("tfidf load ")
# Load Hindi abuse detection model
hindi_tokenizer = AutoTokenizer.from_pretrained("Hate-speech-CNERG/hindi-abusive-MuRIL")
hindi_model = AutoModelForSequenceClassification.from_pretrained("Hate-speech-CNERG/hindi-abusive-MuRIL")
print("Hindi model loaded")
def predict_hindi_text(text):
inputs = hindi_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
outputs = hindi_model(**inputs)
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
scores = predictions[0].detach().numpy()
return scores
# Define function to predict whether sentence is abusive or not
def predict_abusive_lang(text):
print("original text ", text)
lang = language_detection.en_hi_detection(text)
print("language detected ", lang)
if lang=='eng':
cleaned_text = clean.text_cleaning(text)
print("cleaned text ", text)
text = tf.transform([cleaned_text])
print("tfidf transformation ", text)
prediction = model.predict(text)
print("prediction ", prediction)
if len(prediction)!=0 and prediction[0]==0:
return ["NA", cleaned_text]
elif len(prediction)!=0 and prediction[0]==1:
return ["AB",cleaned_text]
else :
return ["Please write something in the comment box..","No cleaned text"]
elif lang=='hi':
print("using transformers for Hindi text")
scores = predict_hindi_text(text)
if scores[1] > scores[0]: # If score for abusive class is higher
return ["AB", text]
else:
return ["NA", text]
else:
return ["UN","No cleaned text"]
# Define the GRADIO output interfaces
output_interfaces = [
gr.Textbox(label="Result"),
gr.Textbox(label="Cleaned text")
]
app = gr.Interface(predict_abusive_lang, inputs='text', outputs=output_interfaces, title="Abuse Classifier", description="Enter a sentence and the model will predict whether it is abusive or not.")
#Start the GRADIO app
app.launch() |