Spaces:

neel692
/

Abusive-Comment-Detection

Running

App Files Files Community

neelsahu commited on Jun 18

Commit

09e57ee

1 Parent(s): b3d43dd

Deploying updates

Browse files

Files changed (2) hide show

app.py +23 -30
requirements.txt +2 -0

app.py CHANGED Viewed

@@ -6,7 +6,8 @@ import nltk
 nltk.download('wordnet')
 import numpy as np
 import language_detection
-import requests
 print("all imports worked")
 # Load pre-trained model
@@ -15,12 +16,18 @@ print("model load ")
 tf = joblib.load('tf_joblib.pkl')
 print("tfidf load ")
-def query(payload):
-    API_URL = "https://api-inference.huggingface.co/models/Hate-speech-CNERG/hindi-abusive-MuRIL"
-    headers = {"Authorization": "Bearer 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"}
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
 # Define function to predict whether sentence is abusive or not
 def predict_abusive_lang(text):
     print("original text ", text)
@@ -42,33 +49,19 @@ def predict_abusive_lang(text):
         else :
             return ["Please write something in the comment box..","No cleaned text"]
     elif lang=='hi':
-        print("using hugging face api")
-        output = query({
-        "inputs": text#"खान चाचा को मेरा सला"
-        })
-        print(output, len(output))
-        # if(len(output))
-        l_0 = float(output[0][0]['score'])
-        l_1 = float(output[0][1]['score'])
-        if  output[0][0]['label']=='LABEL_1' :
-            if l_0>l_1:
-                return ["AB",text]
-        else :
-            return ["NA",text]
-    else :
         return ["UN","No cleaned text"]
-# text = '":::::: 128514 - & % ! @ # $ % ^ & * ( ) _ + I got blocked for 30 minutes, you got blocked for more than days. You is lost.  www.google.com, #happydiwali, @amangupta And I don\'t even know who the fuck are you.  It\'s a zero! \n"'
-# predict_abusive_lang(text)
 # Define the GRADIO output interfaces
 output_interfaces = [
-    gr.outputs.Textbox(label="Result"),
-    gr.outputs.Textbox(label="Cleaned text")
 ]
 app = gr.Interface(predict_abusive_lang, inputs='text', outputs=output_interfaces, title="Abuse Classifier", description="Enter a sentence and the model will predict whether it is abusive or not.")
 #Start the GRADIO app

 nltk.download('wordnet')
 import numpy as np
 import language_detection
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+import torch
 print("all imports worked")
 # Load pre-trained model
 tf = joblib.load('tf_joblib.pkl')
 print("tfidf load ")
+# Load Hindi abuse detection model
+hindi_tokenizer = AutoTokenizer.from_pretrained("Hate-speech-CNERG/hindi-abusive-MuRIL")
+hindi_model = AutoModelForSequenceClassification.from_pretrained("Hate-speech-CNERG/hindi-abusive-MuRIL")
+print("Hindi model loaded")
+def predict_hindi_text(text):
+    inputs = hindi_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
+    outputs = hindi_model(**inputs)
+    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
+    scores = predictions[0].detach().numpy()
+    return scores
 # Define function to predict whether sentence is abusive or not
 def predict_abusive_lang(text):
     print("original text ", text)
         else :
             return ["Please write something in the comment box..","No cleaned text"]
     elif lang=='hi':
+        print("using transformers for Hindi text")
+        scores = predict_hindi_text(text)
+        if scores[1] > scores[0]:  # If score for abusive class is higher
+            return ["AB", text]
+        else:
+            return ["NA", text]
+    else:
         return ["UN","No cleaned text"]
 # Define the GRADIO output interfaces
 output_interfaces = [
+    gr.Textbox(label="Result"),
+    gr.Textbox(label="Cleaned text")
 ]
 app = gr.Interface(predict_abusive_lang, inputs='text', outputs=output_interfaces, title="Abuse Classifier", description="Enter a sentence and the model will predict whether it is abusive or not.")
 #Start the GRADIO app

requirements.txt CHANGED Viewed

@@ -1,4 +1,6 @@
 scikit-learn==1.0.2
 nltk==3.8.1
 joblib==1.0.1

 scikit-learn==1.0.2
 nltk==3.8.1
 joblib==1.0.1
+transformers>=4.30.0
+torch>=2.0.0