File size: 3,567 Bytes
5848fd3
 
 
 
 
 
f326b05
 
 
 
 
5848fd3
 
f326b05
5848fd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f326b05
 
 
 
 
 
 
 
 
 
 
5848fd3
 
 
 
 
f326b05
 
5848fd3
 
 
 
 
 
 
 
 
f326b05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5848fd3
f326b05
 
 
 
5848fd3
 
 
f326b05
 
 
 
5848fd3
f326b05
5848fd3
 
f326b05
5848fd3
f326b05
 
5848fd3
 
 
 
f326b05
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from spacy.lang.en import English
import pandas as pd
import gradio as gr
from transformers import pipeline
from gradio.themes.utils.colors import red, green

# Initialize the NLP pipeline
nlp = English()
nlp.add_pipe("sentencizer")

# Initialize the text classification pipeline
detector = pipeline(task='text-classification', model='SJTU-CL/RoBERTa-large-ArguGPT-sent')

# Define color map for highlighted text
color_map = {
    '0%': green.c400,
    '10%': green.c300,
    '20%': green.c200,
    '30%': green.c100,
    '40%': green.c50,
    '50%': red.c50,
    '60%': red.c100,
    '70%': red.c200,
    '80%': red.c300,
    '90%': red.c400,
    '100%': red.c500
}

def predict_doc(doc):
    sents = [s.text for s in nlp(doc).sents]
    data = {'sentence': [], 'label': [], 'score': []}
    res = []
    for sent in sents:
        prob = predict_one_sent(sent)

        data['sentence'].append(sent)
        data['score'].append(round(prob, 4))
        if prob <= 0.5:
            data['label'].append('Human')
        else:
            data['label'].append('Machine')

        if prob < 0.1: label = '0%'
        elif prob < 0.2: label = '10%'
        elif prob < 0.3: label = '20%'
        elif prob < 0.4: label = '30%'
        elif prob < 0.5: label = '40%'
        elif prob < 0.6: label = '50%'
        elif prob < 0.7: label = '60%'
        elif prob < 0.8: label = '70%'
        elif prob < 0.9: label = '80%'
        elif prob < 1: label = '90%'
        else: label = '100%'
        res.append((sent, label))

    df = pd.DataFrame(data)
    df.to_csv('result.csv')
    overall_score = df.score.mean()
    overall_label = 'Human' if overall_score <= 0.5 else 'Machine'
    sum_str = f'The essay is probably written by {overall_label}. The probability of being generated by AI is {overall_score:.2f}'

    return sum_str, res, df, 'result.csv'

def predict_one_sent(sent):
    res = detector(sent)[0]
    org_label, prob = res['label'], res['score']
    if org_label == 'LABEL_0': prob = 1 - prob
    return prob

# Custom CSS for modern look
custom_css = """
.gradio-container {
    font-family: 'Arial', sans-serif;
}
.gradio-header {
    background-color: #4CAF50;
    color: white;
    padding: 10px;
    text-align: center;
}
.gradio-button {
    background-color: #4CAF50;
    color: white;
    border: none;
    padding: 10px 20px;
    text-align: center;
    text-decoration: none;
    display: inline-block;
    font-size: 16px;
    margin: 4px 2px;
    cursor: pointer;
    border-radius: 5px;
}
.gradio-button:hover {
    background-color: #45a049;
}
"""

with gr.Blocks(css=custom_css) as demo:
    gr.Markdown("## AI vs Human Essay Detector")
    gr.Markdown("This tool helps you determine whether an essay is written by a human or generated by AI.")
    
    with gr.Row():
        with gr.Column():
            text_in = gr.Textbox(
                lines=5, 
                label='Essay Input', 
                info='Please enter the essay in the textbox',
                placeholder="Paste your essay here..."
            )
            btn = gr.Button('Predict who writes this essay!', variant="primary")

        sent_res = gr.HighlightedText(label='Labeled Result', color_map=color_map)
    
    with gr.Row():
        summary = gr.Text(label='Result Summary')
        csv_f = gr.File(label='CSV File Storing Data with All Sentences')

    tab = gr.Dataframe(label='Table with Probability Score', row_count=100)
    btn.click(predict_doc, inputs=[text_in], outputs=[summary, sent_res, tab, csv_f], api_name='predict_doc')

demo.launch()