KavinduHansaka commited on
Commit
88051c8
·
verified ·
1 Parent(s): bbf0867

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -30
app.py CHANGED
@@ -3,26 +3,27 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoMode
3
  import os
4
  from huggingface_hub import login
5
  import torch
 
6
 
7
- # Authenticate with Hugging Face token
8
  HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
9
  login(token=HUGGINGFACE_TOKEN)
10
 
11
- # Load Phi-4 Mini for grammar/tone/fluency
12
  phi_id = "microsoft/phi-4-mini-instruct"
13
  phi_tokenizer = AutoTokenizer.from_pretrained(phi_id, token=HUGGINGFACE_TOKEN)
14
  phi_model = AutoModelForCausalLM.from_pretrained(phi_id, torch_dtype="auto", device_map="auto", token=HUGGINGFACE_TOKEN)
15
  phi_pipe = pipeline("text-generation", model=phi_model, tokenizer=phi_tokenizer)
16
 
17
- # Load updated T5 model for paraphrasing
18
  t5_pipe = pipeline("text2text-generation", model="google-t5/t5-base")
19
 
20
- # Load updated AI detector
21
  detector_id = "openai-community/roberta-base-openai-detector"
22
  detector_tokenizer = AutoTokenizer.from_pretrained(detector_id)
23
  detector_model = AutoModelForSequenceClassification.from_pretrained(detector_id)
24
 
25
- # Text chunking
26
  def chunk_text(text, max_tokens=300):
27
  paragraphs = text.split("\n\n")
28
  chunks, current = [], ""
@@ -36,17 +37,14 @@ def chunk_text(text, max_tokens=300):
36
  chunks.append(current.strip())
37
  return chunks
38
 
39
- # Prompt handling
40
  def generate_phi_prompt(text, instruction):
41
  chunks = chunk_text(text)
42
  outputs = []
43
  for chunk in chunks:
44
  prompt = f"{instruction}\n{chunk}\nResponse:"
45
  result = phi_pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.3)[0]["generated_text"]
46
- if "Response:" in result:
47
- outputs.append(result.split("Response:")[1].strip())
48
- else:
49
- outputs.append(result.strip())
50
  return "\n\n".join(outputs)
51
 
52
  # Writing tools
@@ -61,24 +59,26 @@ def improve_fluency(text):
61
 
62
  def paraphrase(text):
63
  chunks = chunk_text(text, max_tokens=60)
64
- outputs = []
65
- for chunk in chunks:
66
- output = t5_pipe("paraphrase this sentence: " + chunk, max_length=128, num_beams=5, do_sample=False)[0]["generated_text"]
67
- outputs.append(output)
68
- return "\n\n".join(outputs)
69
 
70
- # AI Detection
71
  def detect_ai_percent(text):
72
  inputs = detector_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
73
  with torch.no_grad():
74
  logits = detector_model(**inputs).logits
75
  probs = torch.softmax(logits, dim=1).squeeze()
76
- return {
77
- "Likely Human": round(probs[0].item() * 100, 2),
78
- "Likely AI-Generated": round(probs[1].item() * 100, 2)
79
- }
80
-
81
- # Rewrite to lower AI score
 
 
 
82
  def rewrite_to_human(text):
83
  return generate_phi_prompt(text, "Rewrite the following text so that it is indistinguishable from human writing and avoids AI detection. Be natural and fluent:")
84
 
@@ -94,10 +94,10 @@ def save_file(text):
94
  f.write(text)
95
  return path
96
 
97
- # UI
98
  with gr.Blocks() as demo:
99
  gr.Markdown("# ✍️ AI Writing Assistant + Detector")
100
- gr.Markdown("Detect AI likelihood first. Rewrite manually if high. Fix grammar, tone, fluency, paraphrase, upload/download text.")
101
 
102
  with gr.Row():
103
  file_input = gr.File(label="📂 Upload .txt File", file_types=[".txt"])
@@ -113,7 +113,6 @@ with gr.Blocks() as demo:
113
  btn_paraphrase = gr.Button("🌀 Paraphrase")
114
 
115
  output_text = gr.Textbox(lines=12, label="Output")
116
-
117
  btn_grammar.click(fn=fix_grammar, inputs=input_text, outputs=output_text)
118
  btn_tone.click(fn=improve_tone, inputs=input_text, outputs=output_text)
119
  btn_fluency.click(fn=improve_fluency, inputs=input_text, outputs=output_text)
@@ -121,16 +120,18 @@ with gr.Blocks() as demo:
121
 
122
  gr.Markdown("## 🕵️ AI Detection")
123
  detect_btn = gr.Button("Detect AI Probability")
124
- ai_output = gr.Label(label="AI Detection Result")
125
- detect_btn.click(fn=detect_ai_percent, inputs=input_text, outputs=ai_output)
 
 
126
 
127
- gr.Markdown("## 🔁 Rewrite to Sound Human")
128
- rewrite_btn = gr.Button("Rewrite to Reduce AI Likelihood")
129
  rewritten_text = gr.Textbox(lines=12, label="Rewritten Text")
130
  rewrite_btn.click(fn=rewrite_to_human, inputs=input_text, outputs=rewritten_text)
131
 
132
  gr.Markdown("## 📤 Download Output")
133
- download_btn = gr.Button("💾 Download")
134
  download_file = gr.File(label="Click to download", interactive=True)
135
  download_btn.click(fn=save_file, inputs=output_text, outputs=download_file)
136
 
 
3
  import os
4
  from huggingface_hub import login
5
  import torch
6
+ import pandas as pd
7
 
8
+ # Authenticate
9
  HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
10
  login(token=HUGGINGFACE_TOKEN)
11
 
12
+ # Phi-4 Mini
13
  phi_id = "microsoft/phi-4-mini-instruct"
14
  phi_tokenizer = AutoTokenizer.from_pretrained(phi_id, token=HUGGINGFACE_TOKEN)
15
  phi_model = AutoModelForCausalLM.from_pretrained(phi_id, torch_dtype="auto", device_map="auto", token=HUGGINGFACE_TOKEN)
16
  phi_pipe = pipeline("text-generation", model=phi_model, tokenizer=phi_tokenizer)
17
 
18
+ # T5 for paraphrasing
19
  t5_pipe = pipeline("text2text-generation", model="google-t5/t5-base")
20
 
21
+ # AI Detector
22
  detector_id = "openai-community/roberta-base-openai-detector"
23
  detector_tokenizer = AutoTokenizer.from_pretrained(detector_id)
24
  detector_model = AutoModelForSequenceClassification.from_pretrained(detector_id)
25
 
26
+ # Chunking
27
  def chunk_text(text, max_tokens=300):
28
  paragraphs = text.split("\n\n")
29
  chunks, current = [], ""
 
37
  chunks.append(current.strip())
38
  return chunks
39
 
40
+ # Phi-based instruction prompts
41
  def generate_phi_prompt(text, instruction):
42
  chunks = chunk_text(text)
43
  outputs = []
44
  for chunk in chunks:
45
  prompt = f"{instruction}\n{chunk}\nResponse:"
46
  result = phi_pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.3)[0]["generated_text"]
47
+ outputs.append(result.split("Response:")[1].strip() if "Response:" in result else result.strip())
 
 
 
48
  return "\n\n".join(outputs)
49
 
50
  # Writing tools
 
59
 
60
  def paraphrase(text):
61
  chunks = chunk_text(text, max_tokens=60)
62
+ return "\n\n".join(
63
+ t5_pipe("paraphrase this sentence: " + chunk, max_length=128, num_beams=5, do_sample=False)[0]["generated_text"]
64
+ for chunk in chunks
65
+ )
 
66
 
67
+ # AI Detection as label + bar
68
  def detect_ai_percent(text):
69
  inputs = detector_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
70
  with torch.no_grad():
71
  logits = detector_model(**inputs).logits
72
  probs = torch.softmax(logits, dim=1).squeeze()
73
+ scores = [round(probs[0].item() * 100, 2), round(probs[1].item() * 100, 2)]
74
+ summary = "Likely Human" if scores[0] > scores[1] else "Likely AI-Generated"
75
+ df = pd.DataFrame({
76
+ "Label": ["Likely Human", "Likely AI-Generated"],
77
+ "Probability": scores
78
+ })
79
+ return summary, df
80
+
81
+ # Rewrite to sound human
82
  def rewrite_to_human(text):
83
  return generate_phi_prompt(text, "Rewrite the following text so that it is indistinguishable from human writing and avoids AI detection. Be natural and fluent:")
84
 
 
94
  f.write(text)
95
  return path
96
 
97
+ # Gradio UI
98
  with gr.Blocks() as demo:
99
  gr.Markdown("# ✍️ AI Writing Assistant + Detector")
100
+ gr.Markdown("Fix grammar, tone, fluency, paraphrase, detect AI content, and rewrite to reduce AI probability.")
101
 
102
  with gr.Row():
103
  file_input = gr.File(label="📂 Upload .txt File", file_types=[".txt"])
 
113
  btn_paraphrase = gr.Button("🌀 Paraphrase")
114
 
115
  output_text = gr.Textbox(lines=12, label="Output")
 
116
  btn_grammar.click(fn=fix_grammar, inputs=input_text, outputs=output_text)
117
  btn_tone.click(fn=improve_tone, inputs=input_text, outputs=output_text)
118
  btn_fluency.click(fn=improve_fluency, inputs=input_text, outputs=output_text)
 
120
 
121
  gr.Markdown("## 🕵️ AI Detection")
122
  detect_btn = gr.Button("Detect AI Probability")
123
+ ai_summary = gr.Textbox(label="Summary (Human vs AI)", interactive=False)
124
+ ai_chart = gr.BarPlot(x="Label", y="Probability", y_lim=(0, 100), title="AI Detection Result", color="Label", height=250)
125
+
126
+ detect_btn.click(fn=detect_ai_percent, inputs=input_text, outputs=[ai_summary, ai_chart])
127
 
128
+ gr.Markdown("## 🔁 Rewrite to Sound More Human")
129
+ rewrite_btn = gr.Button("Rewrite to Lower AI Likelihood")
130
  rewritten_text = gr.Textbox(lines=12, label="Rewritten Text")
131
  rewrite_btn.click(fn=rewrite_to_human, inputs=input_text, outputs=rewritten_text)
132
 
133
  gr.Markdown("## 📤 Download Output")
134
+ download_btn = gr.Button("💾 Download Final Output")
135
  download_file = gr.File(label="Click to download", interactive=True)
136
  download_btn.click(fn=save_file, inputs=output_text, outputs=download_file)
137