Spaces:

KavinduHansaka
/

grammar-fixer-phi4-mini

Running

App Files Files Community

KavinduHansaka commited on May 15

Commit

88051c8

verified ·

1 Parent(s): bbf0867

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -30

app.py CHANGED Viewed

@@ -3,26 +3,27 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoMode
 import os
 from huggingface_hub import login
 import torch
-# Authenticate with Hugging Face token
 HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
 login(token=HUGGINGFACE_TOKEN)
-# Load Phi-4 Mini for grammar/tone/fluency
 phi_id = "microsoft/phi-4-mini-instruct"
 phi_tokenizer = AutoTokenizer.from_pretrained(phi_id, token=HUGGINGFACE_TOKEN)
 phi_model = AutoModelForCausalLM.from_pretrained(phi_id, torch_dtype="auto", device_map="auto", token=HUGGINGFACE_TOKEN)
 phi_pipe = pipeline("text-generation", model=phi_model, tokenizer=phi_tokenizer)
-# Load updated T5 model for paraphrasing
 t5_pipe = pipeline("text2text-generation", model="google-t5/t5-base")
-# Load updated AI detector
 detector_id = "openai-community/roberta-base-openai-detector"
 detector_tokenizer = AutoTokenizer.from_pretrained(detector_id)
 detector_model = AutoModelForSequenceClassification.from_pretrained(detector_id)
-# Text chunking
 def chunk_text(text, max_tokens=300):
     paragraphs = text.split("\n\n")
     chunks, current = [], ""
@@ -36,17 +37,14 @@ def chunk_text(text, max_tokens=300):
         chunks.append(current.strip())
     return chunks
-# Prompt handling
 def generate_phi_prompt(text, instruction):
     chunks = chunk_text(text)
     outputs = []
     for chunk in chunks:
         prompt = f"{instruction}\n{chunk}\nResponse:"
         result = phi_pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.3)[0]["generated_text"]
-        if "Response:" in result:
-            outputs.append(result.split("Response:")[1].strip())
-        else:
-            outputs.append(result.strip())
     return "\n\n".join(outputs)
 # Writing tools
@@ -61,24 +59,26 @@ def improve_fluency(text):
 def paraphrase(text):
     chunks = chunk_text(text, max_tokens=60)
-    outputs = []
-    for chunk in chunks:
-        output = t5_pipe("paraphrase this sentence: " + chunk, max_length=128, num_beams=5, do_sample=False)[0]["generated_text"]
-        outputs.append(output)
-    return "\n\n".join(outputs)
-# AI Detection
 def detect_ai_percent(text):
     inputs = detector_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
         logits = detector_model(**inputs).logits
         probs = torch.softmax(logits, dim=1).squeeze()
-    return {
-        "Likely Human": round(probs[0].item() * 100, 2),
-        "Likely AI-Generated": round(probs[1].item() * 100, 2)
-    }
-# Rewrite to lower AI score
 def rewrite_to_human(text):
     return generate_phi_prompt(text, "Rewrite the following text so that it is indistinguishable from human writing and avoids AI detection. Be natural and fluent:")
@@ -94,10 +94,10 @@ def save_file(text):
         f.write(text)
     return path
-# UI
 with gr.Blocks() as demo:
     gr.Markdown("# ✍️ AI Writing Assistant + Detector")
-    gr.Markdown("Detect AI likelihood first. Rewrite manually if high. Fix grammar, tone, fluency, paraphrase, upload/download text.")
     with gr.Row():
         file_input = gr.File(label="📂 Upload .txt File", file_types=[".txt"])
@@ -113,7 +113,6 @@ with gr.Blocks() as demo:
         btn_paraphrase = gr.Button("🌀 Paraphrase")
     output_text = gr.Textbox(lines=12, label="Output")
     btn_grammar.click(fn=fix_grammar, inputs=input_text, outputs=output_text)
     btn_tone.click(fn=improve_tone, inputs=input_text, outputs=output_text)
     btn_fluency.click(fn=improve_fluency, inputs=input_text, outputs=output_text)
@@ -121,16 +120,18 @@ with gr.Blocks() as demo:
     gr.Markdown("## 🕵️ AI Detection")
     detect_btn = gr.Button("Detect AI Probability")
-    ai_output = gr.Label(label="AI Detection Result")
-    detect_btn.click(fn=detect_ai_percent, inputs=input_text, outputs=ai_output)
-    gr.Markdown("## 🔁 Rewrite to Sound Human")
-    rewrite_btn = gr.Button("Rewrite to Reduce AI Likelihood")
     rewritten_text = gr.Textbox(lines=12, label="Rewritten Text")
     rewrite_btn.click(fn=rewrite_to_human, inputs=input_text, outputs=rewritten_text)
     gr.Markdown("## 📤 Download Output")
-    download_btn = gr.Button("💾 Download")
     download_file = gr.File(label="Click to download", interactive=True)
     download_btn.click(fn=save_file, inputs=output_text, outputs=download_file)

 import os
 from huggingface_hub import login
 import torch
+import pandas as pd
+# Authenticate
 HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
 login(token=HUGGINGFACE_TOKEN)
+# Phi-4 Mini
 phi_id = "microsoft/phi-4-mini-instruct"
 phi_tokenizer = AutoTokenizer.from_pretrained(phi_id, token=HUGGINGFACE_TOKEN)
 phi_model = AutoModelForCausalLM.from_pretrained(phi_id, torch_dtype="auto", device_map="auto", token=HUGGINGFACE_TOKEN)
 phi_pipe = pipeline("text-generation", model=phi_model, tokenizer=phi_tokenizer)
+# T5 for paraphrasing
 t5_pipe = pipeline("text2text-generation", model="google-t5/t5-base")
+# AI Detector
 detector_id = "openai-community/roberta-base-openai-detector"
 detector_tokenizer = AutoTokenizer.from_pretrained(detector_id)
 detector_model = AutoModelForSequenceClassification.from_pretrained(detector_id)
+# Chunking
 def chunk_text(text, max_tokens=300):
     paragraphs = text.split("\n\n")
     chunks, current = [], ""
         chunks.append(current.strip())
     return chunks
+# Phi-based instruction prompts
 def generate_phi_prompt(text, instruction):
     chunks = chunk_text(text)
     outputs = []
     for chunk in chunks:
         prompt = f"{instruction}\n{chunk}\nResponse:"
         result = phi_pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.3)[0]["generated_text"]
+        outputs.append(result.split("Response:")[1].strip() if "Response:" in result else result.strip())
     return "\n\n".join(outputs)
 # Writing tools
 def paraphrase(text):
     chunks = chunk_text(text, max_tokens=60)
+    return "\n\n".join(
+        t5_pipe("paraphrase this sentence: " + chunk, max_length=128, num_beams=5, do_sample=False)[0]["generated_text"]
+        for chunk in chunks
+    )
+# AI Detection as label + bar
 def detect_ai_percent(text):
     inputs = detector_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
         logits = detector_model(**inputs).logits
         probs = torch.softmax(logits, dim=1).squeeze()
+        scores = [round(probs[0].item() * 100, 2), round(probs[1].item() * 100, 2)]
+        summary = "Likely Human" if scores[0] > scores[1] else "Likely AI-Generated"
+        df = pd.DataFrame({
+            "Label": ["Likely Human", "Likely AI-Generated"],
+            "Probability": scores
+        })
+        return summary, df
+# Rewrite to sound human
 def rewrite_to_human(text):
     return generate_phi_prompt(text, "Rewrite the following text so that it is indistinguishable from human writing and avoids AI detection. Be natural and fluent:")
         f.write(text)
     return path
+# Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("# ✍️ AI Writing Assistant + Detector")
+    gr.Markdown("Fix grammar, tone, fluency, paraphrase, detect AI content, and rewrite to reduce AI probability.")
     with gr.Row():
         file_input = gr.File(label="📂 Upload .txt File", file_types=[".txt"])
         btn_paraphrase = gr.Button("🌀 Paraphrase")
     output_text = gr.Textbox(lines=12, label="Output")
     btn_grammar.click(fn=fix_grammar, inputs=input_text, outputs=output_text)
     btn_tone.click(fn=improve_tone, inputs=input_text, outputs=output_text)
     btn_fluency.click(fn=improve_fluency, inputs=input_text, outputs=output_text)
     gr.Markdown("## 🕵️ AI Detection")
     detect_btn = gr.Button("Detect AI Probability")
+    ai_summary = gr.Textbox(label="Summary (Human vs AI)", interactive=False)
+    ai_chart = gr.BarPlot(x="Label", y="Probability", y_lim=(0, 100), title="AI Detection Result", color="Label", height=250)
+    detect_btn.click(fn=detect_ai_percent, inputs=input_text, outputs=[ai_summary, ai_chart])
+    gr.Markdown("## 🔁 Rewrite to Sound More Human")
+    rewrite_btn = gr.Button("Rewrite to Lower AI Likelihood")
     rewritten_text = gr.Textbox(lines=12, label="Rewritten Text")
     rewrite_btn.click(fn=rewrite_to_human, inputs=input_text, outputs=rewritten_text)
     gr.Markdown("## 📤 Download Output")
+    download_btn = gr.Button("💾 Download Final Output")
     download_file = gr.File(label="Click to download", interactive=True)
     download_btn.click(fn=save_file, inputs=output_text, outputs=download_file)