neelsahu commited on
Commit
09e57ee
·
1 Parent(s): b3d43dd

Deploying updates

Browse files
Files changed (2) hide show
  1. app.py +23 -30
  2. requirements.txt +2 -0
app.py CHANGED
@@ -6,7 +6,8 @@ import nltk
6
  nltk.download('wordnet')
7
  import numpy as np
8
  import language_detection
9
- import requests
 
10
 
11
  print("all imports worked")
12
  # Load pre-trained model
@@ -15,12 +16,18 @@ print("model load ")
15
  tf = joblib.load('tf_joblib.pkl')
16
  print("tfidf load ")
17
 
18
- def query(payload):
19
- API_URL = "https://api-inference.huggingface.co/models/Hate-speech-CNERG/hindi-abusive-MuRIL"
20
- headers = {"Authorization": "Bearer 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"}
21
- response = requests.post(API_URL, headers=headers, json=payload)
22
- return response.json()
23
-
 
 
 
 
 
 
24
  # Define function to predict whether sentence is abusive or not
25
  def predict_abusive_lang(text):
26
  print("original text ", text)
@@ -42,33 +49,19 @@ def predict_abusive_lang(text):
42
  else :
43
  return ["Please write something in the comment box..","No cleaned text"]
44
  elif lang=='hi':
45
-
46
- print("using hugging face api")
47
- output = query({
48
- "inputs": text#"खान चाचा को मेरा सला"
49
- })
50
- print(output, len(output))
51
- # if(len(output))
52
- l_0 = float(output[0][0]['score'])
53
- l_1 = float(output[0][1]['score'])
54
- if output[0][0]['label']=='LABEL_1' :
55
- if l_0>l_1:
56
- return ["AB",text]
57
-
58
- else :
59
- return ["NA",text]
60
-
61
- else :
62
  return ["UN","No cleaned text"]
63
 
64
-
65
- # text = '":::::: 128514 - & % ! @ # $ % ^ & * ( ) _ + I got blocked for 30 minutes, you got blocked for more than days. You is lost. www.google.com, #happydiwali, @amangupta And I don\'t even know who the fuck are you. It\'s a zero! \n"'
66
- # predict_abusive_lang(text)
67
-
68
  # Define the GRADIO output interfaces
69
  output_interfaces = [
70
- gr.outputs.Textbox(label="Result"),
71
- gr.outputs.Textbox(label="Cleaned text")
72
  ]
73
  app = gr.Interface(predict_abusive_lang, inputs='text', outputs=output_interfaces, title="Abuse Classifier", description="Enter a sentence and the model will predict whether it is abusive or not.")
74
  #Start the GRADIO app
 
6
  nltk.download('wordnet')
7
  import numpy as np
8
  import language_detection
9
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
10
+ import torch
11
 
12
  print("all imports worked")
13
  # Load pre-trained model
 
16
  tf = joblib.load('tf_joblib.pkl')
17
  print("tfidf load ")
18
 
19
+ # Load Hindi abuse detection model
20
+ hindi_tokenizer = AutoTokenizer.from_pretrained("Hate-speech-CNERG/hindi-abusive-MuRIL")
21
+ hindi_model = AutoModelForSequenceClassification.from_pretrained("Hate-speech-CNERG/hindi-abusive-MuRIL")
22
+ print("Hindi model loaded")
23
+
24
+ def predict_hindi_text(text):
25
+ inputs = hindi_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
26
+ outputs = hindi_model(**inputs)
27
+ predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
28
+ scores = predictions[0].detach().numpy()
29
+ return scores
30
+
31
  # Define function to predict whether sentence is abusive or not
32
  def predict_abusive_lang(text):
33
  print("original text ", text)
 
49
  else :
50
  return ["Please write something in the comment box..","No cleaned text"]
51
  elif lang=='hi':
52
+ print("using transformers for Hindi text")
53
+ scores = predict_hindi_text(text)
54
+ if scores[1] > scores[0]: # If score for abusive class is higher
55
+ return ["AB", text]
56
+ else:
57
+ return ["NA", text]
58
+ else:
 
 
 
 
 
 
 
 
 
 
59
  return ["UN","No cleaned text"]
60
 
 
 
 
 
61
  # Define the GRADIO output interfaces
62
  output_interfaces = [
63
+ gr.Textbox(label="Result"),
64
+ gr.Textbox(label="Cleaned text")
65
  ]
66
  app = gr.Interface(predict_abusive_lang, inputs='text', outputs=output_interfaces, title="Abuse Classifier", description="Enter a sentence and the model will predict whether it is abusive or not.")
67
  #Start the GRADIO app
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
  scikit-learn==1.0.2
2
  nltk==3.8.1
3
  joblib==1.0.1
 
 
4
 
 
1
  scikit-learn==1.0.2
2
  nltk==3.8.1
3
  joblib==1.0.1
4
+ transformers>=4.30.0
5
+ torch>=2.0.0
6