Update app.py
Browse files
app.py
CHANGED
@@ -3,26 +3,27 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoMode
|
|
3 |
import os
|
4 |
from huggingface_hub import login
|
5 |
import torch
|
|
|
6 |
|
7 |
-
# Authenticate
|
8 |
HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
|
9 |
login(token=HUGGINGFACE_TOKEN)
|
10 |
|
11 |
-
#
|
12 |
phi_id = "microsoft/phi-4-mini-instruct"
|
13 |
phi_tokenizer = AutoTokenizer.from_pretrained(phi_id, token=HUGGINGFACE_TOKEN)
|
14 |
phi_model = AutoModelForCausalLM.from_pretrained(phi_id, torch_dtype="auto", device_map="auto", token=HUGGINGFACE_TOKEN)
|
15 |
phi_pipe = pipeline("text-generation", model=phi_model, tokenizer=phi_tokenizer)
|
16 |
|
17 |
-
#
|
18 |
t5_pipe = pipeline("text2text-generation", model="google-t5/t5-base")
|
19 |
|
20 |
-
#
|
21 |
detector_id = "openai-community/roberta-base-openai-detector"
|
22 |
detector_tokenizer = AutoTokenizer.from_pretrained(detector_id)
|
23 |
detector_model = AutoModelForSequenceClassification.from_pretrained(detector_id)
|
24 |
|
25 |
-
#
|
26 |
def chunk_text(text, max_tokens=300):
|
27 |
paragraphs = text.split("\n\n")
|
28 |
chunks, current = [], ""
|
@@ -36,17 +37,14 @@ def chunk_text(text, max_tokens=300):
|
|
36 |
chunks.append(current.strip())
|
37 |
return chunks
|
38 |
|
39 |
-
#
|
40 |
def generate_phi_prompt(text, instruction):
|
41 |
chunks = chunk_text(text)
|
42 |
outputs = []
|
43 |
for chunk in chunks:
|
44 |
prompt = f"{instruction}\n{chunk}\nResponse:"
|
45 |
result = phi_pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.3)[0]["generated_text"]
|
46 |
-
if "Response:" in result
|
47 |
-
outputs.append(result.split("Response:")[1].strip())
|
48 |
-
else:
|
49 |
-
outputs.append(result.strip())
|
50 |
return "\n\n".join(outputs)
|
51 |
|
52 |
# Writing tools
|
@@ -61,24 +59,26 @@ def improve_fluency(text):
|
|
61 |
|
62 |
def paraphrase(text):
|
63 |
chunks = chunk_text(text, max_tokens=60)
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
return "\n\n".join(outputs)
|
69 |
|
70 |
-
# AI Detection
|
71 |
def detect_ai_percent(text):
|
72 |
inputs = detector_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
73 |
with torch.no_grad():
|
74 |
logits = detector_model(**inputs).logits
|
75 |
probs = torch.softmax(logits, dim=1).squeeze()
|
76 |
-
|
77 |
-
"Likely Human"
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
82 |
def rewrite_to_human(text):
|
83 |
return generate_phi_prompt(text, "Rewrite the following text so that it is indistinguishable from human writing and avoids AI detection. Be natural and fluent:")
|
84 |
|
@@ -94,10 +94,10 @@ def save_file(text):
|
|
94 |
f.write(text)
|
95 |
return path
|
96 |
|
97 |
-
# UI
|
98 |
with gr.Blocks() as demo:
|
99 |
gr.Markdown("# ✍️ AI Writing Assistant + Detector")
|
100 |
-
gr.Markdown("
|
101 |
|
102 |
with gr.Row():
|
103 |
file_input = gr.File(label="📂 Upload .txt File", file_types=[".txt"])
|
@@ -113,7 +113,6 @@ with gr.Blocks() as demo:
|
|
113 |
btn_paraphrase = gr.Button("🌀 Paraphrase")
|
114 |
|
115 |
output_text = gr.Textbox(lines=12, label="Output")
|
116 |
-
|
117 |
btn_grammar.click(fn=fix_grammar, inputs=input_text, outputs=output_text)
|
118 |
btn_tone.click(fn=improve_tone, inputs=input_text, outputs=output_text)
|
119 |
btn_fluency.click(fn=improve_fluency, inputs=input_text, outputs=output_text)
|
@@ -121,16 +120,18 @@ with gr.Blocks() as demo:
|
|
121 |
|
122 |
gr.Markdown("## 🕵️ AI Detection")
|
123 |
detect_btn = gr.Button("Detect AI Probability")
|
124 |
-
|
125 |
-
|
|
|
|
|
126 |
|
127 |
-
gr.Markdown("## 🔁 Rewrite to Sound Human")
|
128 |
-
rewrite_btn = gr.Button("Rewrite to
|
129 |
rewritten_text = gr.Textbox(lines=12, label="Rewritten Text")
|
130 |
rewrite_btn.click(fn=rewrite_to_human, inputs=input_text, outputs=rewritten_text)
|
131 |
|
132 |
gr.Markdown("## 📤 Download Output")
|
133 |
-
download_btn = gr.Button("💾 Download")
|
134 |
download_file = gr.File(label="Click to download", interactive=True)
|
135 |
download_btn.click(fn=save_file, inputs=output_text, outputs=download_file)
|
136 |
|
|
|
3 |
import os
|
4 |
from huggingface_hub import login
|
5 |
import torch
|
6 |
+
import pandas as pd
|
7 |
|
8 |
+
# Authenticate
|
9 |
HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
|
10 |
login(token=HUGGINGFACE_TOKEN)
|
11 |
|
12 |
+
# Phi-4 Mini
|
13 |
phi_id = "microsoft/phi-4-mini-instruct"
|
14 |
phi_tokenizer = AutoTokenizer.from_pretrained(phi_id, token=HUGGINGFACE_TOKEN)
|
15 |
phi_model = AutoModelForCausalLM.from_pretrained(phi_id, torch_dtype="auto", device_map="auto", token=HUGGINGFACE_TOKEN)
|
16 |
phi_pipe = pipeline("text-generation", model=phi_model, tokenizer=phi_tokenizer)
|
17 |
|
18 |
+
# T5 for paraphrasing
|
19 |
t5_pipe = pipeline("text2text-generation", model="google-t5/t5-base")
|
20 |
|
21 |
+
# AI Detector
|
22 |
detector_id = "openai-community/roberta-base-openai-detector"
|
23 |
detector_tokenizer = AutoTokenizer.from_pretrained(detector_id)
|
24 |
detector_model = AutoModelForSequenceClassification.from_pretrained(detector_id)
|
25 |
|
26 |
+
# Chunking
|
27 |
def chunk_text(text, max_tokens=300):
|
28 |
paragraphs = text.split("\n\n")
|
29 |
chunks, current = [], ""
|
|
|
37 |
chunks.append(current.strip())
|
38 |
return chunks
|
39 |
|
40 |
+
# Phi-based instruction prompts
|
41 |
def generate_phi_prompt(text, instruction):
|
42 |
chunks = chunk_text(text)
|
43 |
outputs = []
|
44 |
for chunk in chunks:
|
45 |
prompt = f"{instruction}\n{chunk}\nResponse:"
|
46 |
result = phi_pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.3)[0]["generated_text"]
|
47 |
+
outputs.append(result.split("Response:")[1].strip() if "Response:" in result else result.strip())
|
|
|
|
|
|
|
48 |
return "\n\n".join(outputs)
|
49 |
|
50 |
# Writing tools
|
|
|
59 |
|
60 |
def paraphrase(text):
|
61 |
chunks = chunk_text(text, max_tokens=60)
|
62 |
+
return "\n\n".join(
|
63 |
+
t5_pipe("paraphrase this sentence: " + chunk, max_length=128, num_beams=5, do_sample=False)[0]["generated_text"]
|
64 |
+
for chunk in chunks
|
65 |
+
)
|
|
|
66 |
|
67 |
+
# AI Detection as label + bar
|
68 |
def detect_ai_percent(text):
|
69 |
inputs = detector_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
70 |
with torch.no_grad():
|
71 |
logits = detector_model(**inputs).logits
|
72 |
probs = torch.softmax(logits, dim=1).squeeze()
|
73 |
+
scores = [round(probs[0].item() * 100, 2), round(probs[1].item() * 100, 2)]
|
74 |
+
summary = "Likely Human" if scores[0] > scores[1] else "Likely AI-Generated"
|
75 |
+
df = pd.DataFrame({
|
76 |
+
"Label": ["Likely Human", "Likely AI-Generated"],
|
77 |
+
"Probability": scores
|
78 |
+
})
|
79 |
+
return summary, df
|
80 |
+
|
81 |
+
# Rewrite to sound human
|
82 |
def rewrite_to_human(text):
|
83 |
return generate_phi_prompt(text, "Rewrite the following text so that it is indistinguishable from human writing and avoids AI detection. Be natural and fluent:")
|
84 |
|
|
|
94 |
f.write(text)
|
95 |
return path
|
96 |
|
97 |
+
# Gradio UI
|
98 |
with gr.Blocks() as demo:
|
99 |
gr.Markdown("# ✍️ AI Writing Assistant + Detector")
|
100 |
+
gr.Markdown("Fix grammar, tone, fluency, paraphrase, detect AI content, and rewrite to reduce AI probability.")
|
101 |
|
102 |
with gr.Row():
|
103 |
file_input = gr.File(label="📂 Upload .txt File", file_types=[".txt"])
|
|
|
113 |
btn_paraphrase = gr.Button("🌀 Paraphrase")
|
114 |
|
115 |
output_text = gr.Textbox(lines=12, label="Output")
|
|
|
116 |
btn_grammar.click(fn=fix_grammar, inputs=input_text, outputs=output_text)
|
117 |
btn_tone.click(fn=improve_tone, inputs=input_text, outputs=output_text)
|
118 |
btn_fluency.click(fn=improve_fluency, inputs=input_text, outputs=output_text)
|
|
|
120 |
|
121 |
gr.Markdown("## 🕵️ AI Detection")
|
122 |
detect_btn = gr.Button("Detect AI Probability")
|
123 |
+
ai_summary = gr.Textbox(label="Summary (Human vs AI)", interactive=False)
|
124 |
+
ai_chart = gr.BarPlot(x="Label", y="Probability", y_lim=(0, 100), title="AI Detection Result", color="Label", height=250)
|
125 |
+
|
126 |
+
detect_btn.click(fn=detect_ai_percent, inputs=input_text, outputs=[ai_summary, ai_chart])
|
127 |
|
128 |
+
gr.Markdown("## 🔁 Rewrite to Sound More Human")
|
129 |
+
rewrite_btn = gr.Button("Rewrite to Lower AI Likelihood")
|
130 |
rewritten_text = gr.Textbox(lines=12, label="Rewritten Text")
|
131 |
rewrite_btn.click(fn=rewrite_to_human, inputs=input_text, outputs=rewritten_text)
|
132 |
|
133 |
gr.Markdown("## 📤 Download Output")
|
134 |
+
download_btn = gr.Button("💾 Download Final Output")
|
135 |
download_file = gr.File(label="Click to download", interactive=True)
|
136 |
download_btn.click(fn=save_file, inputs=output_text, outputs=download_file)
|
137 |
|