Spaces:

wangoes-dev
/

AI-Text-Detection

Running

App Files Files Community

Muskanrath2234 commited on Mar 11

Commit

5848fd3

1 Parent(s): ac59358

add application files

Browse files

Files changed (2) hide show

app.py +112 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,112 @@

+from spacy.lang.en import English
+nlp = English()
+nlp.add_pipe("sentencizer")
+import pandas as pd
+import gradio as gr
+from transformers import pipeline
+from gradio.themes.utils.colors import red, green
+detector = pipeline(task='text-classification', model='SJTU-CL/RoBERTa-large-ArguGPT-sent')
+color_map = {
+    '0%': green.c400,
+    '10%': green.c300,
+    '20%': green.c200,
+    '30%': green.c100,
+    '40%': green.c50,
+    '50%': red.c50,
+    '60%': red.c100,
+    '70%': red.c200,
+    '80%': red.c300,
+    '90%': red.c400,
+    '100%': red.c500
+}
+def predict_doc(doc):
+    # sents = sent_tokenize(doc)
+    sents = [s.text for s in nlp(doc).sents]
+    data = {'sentence': [], 'label': [], 'score': []}
+    res = []
+    for sent in sents:
+        prob = predict_one_sent(sent)
+        data['sentence'].append(sent)
+        data['score'].append(round(prob, 4))
+        if prob <= 0.5:
+            data['label'].append('Human')
+        else:
+            data['label'].append('Machine')
+        if prob < 0.1:
+            label = '0%'
+        elif prob < 0.2:
+            label = '10%'
+        elif prob < 0.3:
+            label = '20%'
+        elif prob < 0.4:
+            label = '30%'
+        elif prob < 0.5:
+            label = '40%'
+        elif prob < 0.6:
+            label = '50%'
+        elif prob < 0.7:
+            label = '60%'
+        elif prob < 0.8:
+            label = '70%'
+        elif prob < 0.9:
+            label = '80%'
+        elif prob < 1:
+            label = '90%'
+        else:
+            label = '100%'
+        res.append((sent, label))
+    df = pd.DataFrame(data)
+    df.to_csv('result.csv')
+    overall_score = df.score.mean()
+    sum_str = ''
+    if overall_score <= 0.5:
+        overall_label = 'Human'
+    else:
+        overall_label = 'Machine'
+    sum_str = f'The essay is probably written by {overall_label}. The probability of being generated by AI is {overall_score}'
+    return sum_str, res, df, 'result.csv'
+def predict_one_sent(sent):
+    '''
+    convert to prob
+    LABEL_1, 0.66 -> 0.66
+    LABEL_0, 0.66 -> 0.34
+    '''
+    res = detector(sent)[0]
+    org_label, prob = res['label'], res['score']
+    if org_label == 'LABEL_0': prob = 1 - prob
+    return prob
+with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column():
+            text_in = gr.Textbox(
+                lines=5,
+                label='Essay input',
+                info='Please enter the essay in the textbox'
+            )
+            btn = gr.Button('Predict who writes this essay!')
+        sent_res = gr.HighlightedText(label='Labeled Result', color_map=color_map)
+    with gr.Row():
+        summary = gr.Text(label='Result summary')
+        csv_f = gr.File(label='CSV file storing data with all sentences.')
+    tab = gr.Dataframe(label='Table with Probability Score', row_count=100)
+    btn.click(predict_doc, inputs=[text_in], outputs=[summary, sent_res, tab, csv_f], api_name='predict_doc')
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+torch>=2.0.0,<3.0.0
+transformers>=4.30.0,<5.0.0
+spacy==3.8
+pandas>=2.0.0,<3.0.0
+gradio>=3.50.2,<4.0.0
+gradio_client>=0.6.0,<1.0.0
+protobuf>=3.20.0,<4.0.0