AI_Detector_Batch_processing

Sleeping

App Files Files Community

mwiehl commited on 20 days ago

Commit

637ec09

verified ·

1 Parent(s): 86c105f

Update app.py

Browse files

added batch processing

Files changed (1) hide show

app.py +40 -5

app.py CHANGED Viewed

@@ -5,6 +5,8 @@ import re
 from tokenizers import normalizers
 from tokenizers.normalizers import Sequence, Replace, Strip, NFKC
 from tokenizers import Regex
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -58,6 +60,23 @@ tokenizer.backend_tokenizer.normalizer = Sequence([
     Strip()
 ])
 def classify_text(text):
     cleaned_text = clean_text(text)
     if not text.strip():
@@ -100,7 +119,16 @@ def classify_text(text):
     return result_message
@@ -232,12 +260,19 @@ with iface:
     gr.Markdown(f"# {title}")
     gr.Markdown(description)
     text_input = gr.Textbox(label="", placeholder="Type or paste your content here...", elem_id="text_input_box", lines=5)
     result_output = gr.Markdown("", elem_id="result_output_box")
     text_input.change(classify_text, inputs=text_input, outputs=result_output)
-    with gr.Tab("AI text examples"):
-        gr.Examples(AI_texts, inputs=text_input)
-    with gr.Tab("Human text examples"):
-        gr.Examples(Human_texts, inputs=text_input)
     gr.Markdown(bottom_text, elem_id="bottom_text")
 iface.launch(share=True)

 from tokenizers import normalizers
 from tokenizers.normalizers import Sequence, Replace, Strip, NFKC
 from tokenizers import Regex
+import fitz  # PyMuPDF
+import os
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     Strip()
 ])
+def extract_text_from_file(file):
+    file_path = file.name
+    ext = os.path.splitext(file_path)[1].lower()
+    if ext == ".pdf":
+        doc = fitz.open(file_path)
+        text = ""
+        for page in doc:
+            text += page.get_text()
+        doc.close()
+        return text
+    elif ext == ".txt":
+        with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+            return f.read()
+    else:
+        return ""
 def classify_text(text):
     cleaned_text = clean_text(text)
     if not text.strip():
     return result_message
+def batch_classify(files):
+    results = []
+    for file in files:
+        try:
+            text = extract_text_from_file(file)
+            result_html = classify_text(text)
+            results.append([os.path.basename(file.name), result_html])
+        except Exception as e:
+            results.append([os.path.basename(file.name), f"Error: {str(e)}"])
+    return results
     gr.Markdown(f"# {title}")
     gr.Markdown(description)
     text_input = gr.Textbox(label="", placeholder="Type or paste your content here...", elem_id="text_input_box", lines=5)
+    gr.Markdown("## Or upload multiple files for batch classification")
+    file_input = gr.File(label="Upload PDF or Text Files", file_types=[".pdf", ".txt", ".py", ".ipynb"], file_count="multiple")
+    result_table = gr.Dataframe(headers=["File Name", "Classification Result"], wrap=True)
+    file_input.change(fn=batch_classify, inputs=file_input, outputs=result_table)
     result_output = gr.Markdown("", elem_id="result_output_box")
     text_input.change(classify_text, inputs=text_input, outputs=result_output)
     gr.Markdown(bottom_text, elem_id="bottom_text")
 iface.launch(share=True)