wangoes-dev commited on
Commit
f326b05
·
verified ·
1 Parent(s): 5848fd3

update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -50
app.py CHANGED
@@ -1,16 +1,17 @@
1
  from spacy.lang.en import English
2
-
3
- nlp = English()
4
- nlp.add_pipe("sentencizer")
5
-
6
  import pandas as pd
7
  import gradio as gr
8
-
9
  from transformers import pipeline
10
  from gradio.themes.utils.colors import red, green
11
 
 
 
 
 
 
12
  detector = pipeline(task='text-classification', model='SJTU-CL/RoBERTa-large-ArguGPT-sent')
13
 
 
14
  color_map = {
15
  '0%': green.c400,
16
  '10%': green.c300,
@@ -25,9 +26,7 @@ color_map = {
25
  '100%': red.c500
26
  }
27
 
28
-
29
  def predict_doc(doc):
30
- # sents = sent_tokenize(doc)
31
  sents = [s.text for s in nlp(doc).sents]
32
  data = {'sentence': [], 'label': [], 'score': []}
33
  res = []
@@ -41,72 +40,83 @@ def predict_doc(doc):
41
  else:
42
  data['label'].append('Machine')
43
 
44
- if prob < 0.1:
45
- label = '0%'
46
- elif prob < 0.2:
47
- label = '10%'
48
- elif prob < 0.3:
49
- label = '20%'
50
- elif prob < 0.4:
51
- label = '30%'
52
- elif prob < 0.5:
53
- label = '40%'
54
- elif prob < 0.6:
55
- label = '50%'
56
- elif prob < 0.7:
57
- label = '60%'
58
- elif prob < 0.8:
59
- label = '70%'
60
- elif prob < 0.9:
61
- label = '80%'
62
- elif prob < 1:
63
- label = '90%'
64
- else:
65
- label = '100%'
66
  res.append((sent, label))
67
 
68
  df = pd.DataFrame(data)
69
  df.to_csv('result.csv')
70
  overall_score = df.score.mean()
71
- sum_str = ''
72
- if overall_score <= 0.5:
73
- overall_label = 'Human'
74
- else:
75
- overall_label = 'Machine'
76
- sum_str = f'The essay is probably written by {overall_label}. The probability of being generated by AI is {overall_score}'
77
 
78
  return sum_str, res, df, 'result.csv'
79
 
80
-
81
  def predict_one_sent(sent):
82
- '''
83
- convert to prob
84
- LABEL_1, 0.66 -> 0.66
85
- LABEL_0, 0.66 -> 0.34
86
- '''
87
  res = detector(sent)[0]
88
  org_label, prob = res['label'], res['score']
89
  if org_label == 'LABEL_0': prob = 1 - prob
90
  return prob
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
- with gr.Blocks() as demo:
 
 
 
94
  with gr.Row():
95
  with gr.Column():
96
  text_in = gr.Textbox(
97
- lines=5,
98
- label='Essay input',
99
- info='Please enter the essay in the textbox'
 
100
  )
101
- btn = gr.Button('Predict who writes this essay!')
102
 
103
  sent_res = gr.HighlightedText(label='Labeled Result', color_map=color_map)
104
-
105
  with gr.Row():
106
- summary = gr.Text(label='Result summary')
107
- csv_f = gr.File(label='CSV file storing data with all sentences.')
108
 
109
  tab = gr.Dataframe(label='Table with Probability Score', row_count=100)
110
  btn.click(predict_doc, inputs=[text_in], outputs=[summary, sent_res, tab, csv_f], api_name='predict_doc')
111
 
112
- demo.launch()
 
1
  from spacy.lang.en import English
 
 
 
 
2
  import pandas as pd
3
  import gradio as gr
 
4
  from transformers import pipeline
5
  from gradio.themes.utils.colors import red, green
6
 
7
+ # Initialize the NLP pipeline
8
+ nlp = English()
9
+ nlp.add_pipe("sentencizer")
10
+
11
+ # Initialize the text classification pipeline
12
  detector = pipeline(task='text-classification', model='SJTU-CL/RoBERTa-large-ArguGPT-sent')
13
 
14
+ # Define color map for highlighted text
15
  color_map = {
16
  '0%': green.c400,
17
  '10%': green.c300,
 
26
  '100%': red.c500
27
  }
28
 
 
29
  def predict_doc(doc):
 
30
  sents = [s.text for s in nlp(doc).sents]
31
  data = {'sentence': [], 'label': [], 'score': []}
32
  res = []
 
40
  else:
41
  data['label'].append('Machine')
42
 
43
+ if prob < 0.1: label = '0%'
44
+ elif prob < 0.2: label = '10%'
45
+ elif prob < 0.3: label = '20%'
46
+ elif prob < 0.4: label = '30%'
47
+ elif prob < 0.5: label = '40%'
48
+ elif prob < 0.6: label = '50%'
49
+ elif prob < 0.7: label = '60%'
50
+ elif prob < 0.8: label = '70%'
51
+ elif prob < 0.9: label = '80%'
52
+ elif prob < 1: label = '90%'
53
+ else: label = '100%'
 
 
 
 
 
 
 
 
 
 
 
54
  res.append((sent, label))
55
 
56
  df = pd.DataFrame(data)
57
  df.to_csv('result.csv')
58
  overall_score = df.score.mean()
59
+ overall_label = 'Human' if overall_score <= 0.5 else 'Machine'
60
+ sum_str = f'The essay is probably written by {overall_label}. The probability of being generated by AI is {overall_score:.2f}'
 
 
 
 
61
 
62
  return sum_str, res, df, 'result.csv'
63
 
 
64
  def predict_one_sent(sent):
 
 
 
 
 
65
  res = detector(sent)[0]
66
  org_label, prob = res['label'], res['score']
67
  if org_label == 'LABEL_0': prob = 1 - prob
68
  return prob
69
 
70
+ # Custom CSS for modern look
71
+ custom_css = """
72
+ .gradio-container {
73
+ font-family: 'Arial', sans-serif;
74
+ }
75
+ .gradio-header {
76
+ background-color: #4CAF50;
77
+ color: white;
78
+ padding: 10px;
79
+ text-align: center;
80
+ }
81
+ .gradio-button {
82
+ background-color: #4CAF50;
83
+ color: white;
84
+ border: none;
85
+ padding: 10px 20px;
86
+ text-align: center;
87
+ text-decoration: none;
88
+ display: inline-block;
89
+ font-size: 16px;
90
+ margin: 4px 2px;
91
+ cursor: pointer;
92
+ border-radius: 5px;
93
+ }
94
+ .gradio-button:hover {
95
+ background-color: #45a049;
96
+ }
97
+ """
98
 
99
+ with gr.Blocks(css=custom_css) as demo:
100
+ gr.Markdown("## AI vs Human Essay Detector")
101
+ gr.Markdown("This tool helps you determine whether an essay is written by a human or generated by AI.")
102
+
103
  with gr.Row():
104
  with gr.Column():
105
  text_in = gr.Textbox(
106
+ lines=5,
107
+ label='Essay Input',
108
+ info='Please enter the essay in the textbox',
109
+ placeholder="Paste your essay here..."
110
  )
111
+ btn = gr.Button('Predict who writes this essay!', variant="primary")
112
 
113
  sent_res = gr.HighlightedText(label='Labeled Result', color_map=color_map)
114
+
115
  with gr.Row():
116
+ summary = gr.Text(label='Result Summary')
117
+ csv_f = gr.File(label='CSV File Storing Data with All Sentences')
118
 
119
  tab = gr.Dataframe(label='Table with Probability Score', row_count=100)
120
  btn.click(predict_doc, inputs=[text_in], outputs=[summary, sent_res, tab, csv_f], api_name='predict_doc')
121
 
122
+ demo.launch()