Muskanrath2234 commited on
Commit
5848fd3
·
1 Parent(s): ac59358

add application files

Browse files
Files changed (2) hide show
  1. app.py +112 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from spacy.lang.en import English
2
+
3
+ nlp = English()
4
+ nlp.add_pipe("sentencizer")
5
+
6
+ import pandas as pd
7
+ import gradio as gr
8
+
9
+ from transformers import pipeline
10
+ from gradio.themes.utils.colors import red, green
11
+
12
+ detector = pipeline(task='text-classification', model='SJTU-CL/RoBERTa-large-ArguGPT-sent')
13
+
14
+ color_map = {
15
+ '0%': green.c400,
16
+ '10%': green.c300,
17
+ '20%': green.c200,
18
+ '30%': green.c100,
19
+ '40%': green.c50,
20
+ '50%': red.c50,
21
+ '60%': red.c100,
22
+ '70%': red.c200,
23
+ '80%': red.c300,
24
+ '90%': red.c400,
25
+ '100%': red.c500
26
+ }
27
+
28
+
29
+ def predict_doc(doc):
30
+ # sents = sent_tokenize(doc)
31
+ sents = [s.text for s in nlp(doc).sents]
32
+ data = {'sentence': [], 'label': [], 'score': []}
33
+ res = []
34
+ for sent in sents:
35
+ prob = predict_one_sent(sent)
36
+
37
+ data['sentence'].append(sent)
38
+ data['score'].append(round(prob, 4))
39
+ if prob <= 0.5:
40
+ data['label'].append('Human')
41
+ else:
42
+ data['label'].append('Machine')
43
+
44
+ if prob < 0.1:
45
+ label = '0%'
46
+ elif prob < 0.2:
47
+ label = '10%'
48
+ elif prob < 0.3:
49
+ label = '20%'
50
+ elif prob < 0.4:
51
+ label = '30%'
52
+ elif prob < 0.5:
53
+ label = '40%'
54
+ elif prob < 0.6:
55
+ label = '50%'
56
+ elif prob < 0.7:
57
+ label = '60%'
58
+ elif prob < 0.8:
59
+ label = '70%'
60
+ elif prob < 0.9:
61
+ label = '80%'
62
+ elif prob < 1:
63
+ label = '90%'
64
+ else:
65
+ label = '100%'
66
+ res.append((sent, label))
67
+
68
+ df = pd.DataFrame(data)
69
+ df.to_csv('result.csv')
70
+ overall_score = df.score.mean()
71
+ sum_str = ''
72
+ if overall_score <= 0.5:
73
+ overall_label = 'Human'
74
+ else:
75
+ overall_label = 'Machine'
76
+ sum_str = f'The essay is probably written by {overall_label}. The probability of being generated by AI is {overall_score}'
77
+
78
+ return sum_str, res, df, 'result.csv'
79
+
80
+
81
+ def predict_one_sent(sent):
82
+ '''
83
+ convert to prob
84
+ LABEL_1, 0.66 -> 0.66
85
+ LABEL_0, 0.66 -> 0.34
86
+ '''
87
+ res = detector(sent)[0]
88
+ org_label, prob = res['label'], res['score']
89
+ if org_label == 'LABEL_0': prob = 1 - prob
90
+ return prob
91
+
92
+
93
+ with gr.Blocks() as demo:
94
+ with gr.Row():
95
+ with gr.Column():
96
+ text_in = gr.Textbox(
97
+ lines=5,
98
+ label='Essay input',
99
+ info='Please enter the essay in the textbox'
100
+ )
101
+ btn = gr.Button('Predict who writes this essay!')
102
+
103
+ sent_res = gr.HighlightedText(label='Labeled Result', color_map=color_map)
104
+
105
+ with gr.Row():
106
+ summary = gr.Text(label='Result summary')
107
+ csv_f = gr.File(label='CSV file storing data with all sentences.')
108
+
109
+ tab = gr.Dataframe(label='Table with Probability Score', row_count=100)
110
+ btn.click(predict_doc, inputs=[text_in], outputs=[summary, sent_res, tab, csv_f], api_name='predict_doc')
111
+
112
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch>=2.0.0,<3.0.0
2
+ transformers>=4.30.0,<5.0.0
3
+ spacy==3.8
4
+ pandas>=2.0.0,<3.0.0
5
+ gradio>=3.50.2,<4.0.0
6
+ gradio_client>=0.6.0,<1.0.0
7
+ protobuf>=3.20.0,<4.0.0