Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,120 +1,156 @@
|
|
1 |
import gradio as gr
|
2 |
-
import trafilatura
|
3 |
-
from transformers import pipeline
|
4 |
-
import pytesseract
|
5 |
-
from PIL import Image
|
6 |
import requests
|
7 |
-
from
|
|
|
8 |
import difflib
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
|
11 |
-
reviewer = pipeline("text2text-generation", model="google/flan-t5-base")
|
12 |
|
13 |
-
#
|
14 |
-
|
|
|
|
|
15 |
|
16 |
-
#
|
17 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
try:
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
except Exception as e:
|
24 |
return f"β OCR Error: {e}"
|
25 |
|
26 |
-
#
|
27 |
-
def
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
2 |
import requests
|
3 |
+
from PIL import Image
|
4 |
+
import pytesseract
|
5 |
import difflib
|
6 |
+
from io import BytesIO
|
7 |
+
from transformers import pipeline
|
8 |
+
import trafilatura
|
9 |
+
from nltk.tokenize import sent_tokenize
|
10 |
+
import nltk
|
11 |
|
12 |
+
nltk.download("punkt")
|
|
|
13 |
|
14 |
+
# === Load AI model ===
|
15 |
+
reviewer = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", max_new_tokens=200)
|
16 |
+
device = "cpu"
|
17 |
+
print(f"Device set to use {device}")
|
18 |
|
19 |
+
# === Utility: Highlight diffs ===
|
20 |
+
def highlight_diff(original, suggestion):
|
21 |
+
diff = difflib.ndiff(original.split(), suggestion.split())
|
22 |
+
result = ""
|
23 |
+
for word in diff:
|
24 |
+
if word.startswith("- "):
|
25 |
+
result += f"<span style='color:red;text-decoration:line-through'>{word[2:]}</span> "
|
26 |
+
elif word.startswith("+ "):
|
27 |
+
result += f"<span style='color:green;font-weight:bold'>{word[2:]}</span> "
|
28 |
+
elif word.startswith(" "):
|
29 |
+
result += word[2:] + " "
|
30 |
+
return result.strip()
|
31 |
+
|
32 |
+
# === Extract blog content from URL ===
|
33 |
+
def extract_text_from_url(url):
|
34 |
try:
|
35 |
+
headers = {"User-Agent": "Mozilla/5.0"}
|
36 |
+
response = requests.get(url, headers=headers, timeout=10)
|
37 |
+
if response.status_code == 200:
|
38 |
+
return trafilatura.extract(response.text)
|
39 |
+
else:
|
40 |
+
return f"β Blog Error: HTTP {response.status_code} on URL {url}"
|
41 |
+
except Exception as e:
|
42 |
+
return f"β Blog Error: {e}"
|
43 |
+
|
44 |
+
# === Extract text from image URL (OCR) ===
|
45 |
+
def extract_text_from_image(image_url):
|
46 |
+
try:
|
47 |
+
img_data = requests.get(image_url).content
|
48 |
+
image = Image.open(BytesIO(img_data)).convert("L")
|
49 |
+
text = pytesseract.image_to_string(image)
|
50 |
+
return text if text.strip() else "β OCR Error: No readable text found."
|
51 |
except Exception as e:
|
52 |
return f"β OCR Error: {e}"
|
53 |
|
54 |
+
# === Suggestion generator ===
|
55 |
+
def generate_suggestions(text):
|
56 |
+
sentences = sent_tokenize(text)
|
57 |
+
suggestions = []
|
58 |
+
for sent in sentences:
|
59 |
+
prompt = f"Improve the tone, grammar, clarity and flag any sensitive content:\n\n{sent}"
|
60 |
+
output = reviewer(prompt, max_new_tokens=200)[0]["generated_text"]
|
61 |
+
cleaned = output.replace(prompt, "").strip()
|
62 |
+
suggestions.append(cleaned if cleaned else sent)
|
63 |
+
return sentences, suggestions
|
64 |
+
|
65 |
+
# === Final approval handler ===
|
66 |
+
def collect_decisions(originals, suggestions, *choices):
|
67 |
+
results = []
|
68 |
+
for orig, sugg, choice in zip(originals, suggestions, choices):
|
69 |
+
results.append(sugg if choice == "Accept" else orig)
|
70 |
+
return "\n".join(results)
|
71 |
+
|
72 |
+
# === Gradio UI ===
|
73 |
+
with gr.Blocks() as demo:
|
74 |
+
gr.Markdown("# β¨ Blog Reviewer AI")
|
75 |
+
gr.Markdown("Detect tone issues, errors, and sensitive content β and clean them interactively!")
|
76 |
+
|
77 |
+
with gr.Tab("π From Blog URL"):
|
78 |
+
blog_url = gr.Textbox(label="Enter blog URL")
|
79 |
+
fetch_btn = gr.Button("Fetch & Review")
|
80 |
+
|
81 |
+
with gr.Tab("πΌοΈ From Image URL (OCR)"):
|
82 |
+
image_url = gr.Textbox(label="Enter Image URL")
|
83 |
+
image_btn = gr.Button("Extract & Review")
|
84 |
+
|
85 |
+
with gr.Tab("π Paste Text"):
|
86 |
+
pasted_text = gr.Textbox(label="Paste blog content here", lines=10)
|
87 |
+
paste_btn = gr.Button("Review Text")
|
88 |
+
|
89 |
+
output_section = gr.Column(visible=False)
|
90 |
+
originals = gr.State([])
|
91 |
+
suggestions = gr.State([])
|
92 |
+
decision_radios = []
|
93 |
+
|
94 |
+
view_mode = gr.Radio(["Original", "Suggestion", "Side-by-Side"], value="Side-by-Side", label="Choose View")
|
95 |
+
final_output = gr.Textbox(label="β
Final Output", lines=12)
|
96 |
+
finalize_btn = gr.Button("Generate Clean Version")
|
97 |
+
|
98 |
+
sentence_blocks = []
|
99 |
+
|
100 |
+
# === Show suggestions UI ===
|
101 |
+
def show_review(text):
|
102 |
+
origs, suggs = generate_suggestions(text)
|
103 |
+
originals.value = origs
|
104 |
+
suggestions.value = suggs
|
105 |
+
return origs, suggs, True
|
106 |
+
|
107 |
+
# === Populate sentence review rows dynamically ===
|
108 |
+
def populate_review_ui(origs, suggs):
|
109 |
+
global decision_radios, sentence_blocks
|
110 |
+
decision_radios = []
|
111 |
+
sentence_blocks = []
|
112 |
+
|
113 |
+
ui_blocks = []
|
114 |
+
for i, (orig, sugg) in enumerate(zip(origs, suggs)):
|
115 |
+
orig_md = gr.Markdown(f"<b>{orig}</b>", visible=False)
|
116 |
+
sugg_md = gr.Markdown(f"<b>{sugg}</b>", visible=False)
|
117 |
+
diff_md = gr.Markdown(highlight_diff(orig, sugg), visible=True)
|
118 |
+
|
119 |
+
radio = gr.Radio(["Accept", "Reject"], value="Accept", label=f"Suggestion {i+1}")
|
120 |
+
decision_radios.append(radio)
|
121 |
+
sentence_blocks.append((orig_md, sugg_md, diff_md))
|
122 |
+
|
123 |
+
ui_blocks.extend([orig_md, sugg_md, diff_md, radio])
|
124 |
+
return ui_blocks
|
125 |
+
|
126 |
+
# === Toggle view mode ===
|
127 |
+
def toggle_view(view):
|
128 |
+
updates = []
|
129 |
+
for orig_md, sugg_md, diff_md in sentence_blocks:
|
130 |
+
if view == "Original":
|
131 |
+
updates.extend([gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)])
|
132 |
+
elif view == "Suggestion":
|
133 |
+
updates.extend([gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)])
|
134 |
+
else: # Side-by-side
|
135 |
+
updates.extend([gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)])
|
136 |
+
return updates
|
137 |
+
|
138 |
+
# === Final output handler ===
|
139 |
+
def finalize_output(origs, suggs, *choices):
|
140 |
+
return collect_decisions(origs, suggs, *choices)
|
141 |
+
|
142 |
+
# Button click handlers
|
143 |
+
fetch_btn.click(fn=extract_text_from_url, inputs=blog_url, outputs=pasted_text)
|
144 |
+
image_btn.click(fn=extract_text_from_image, inputs=image_url, outputs=pasted_text)
|
145 |
+
|
146 |
+
paste_btn.click(fn=show_review, inputs=pasted_text, outputs=[originals, suggestions, output_section])
|
147 |
+
|
148 |
+
# Dynamic render trigger
|
149 |
+
originals.change(fn=populate_review_ui, inputs=[originals, suggestions], outputs=[])
|
150 |
+
|
151 |
+
view_mode.change(fn=toggle_view, inputs=view_mode,
|
152 |
+
outputs=[item for block in sentence_blocks for item in block])
|
153 |
+
|
154 |
+
finalize_btn.click(fn=finalize_output, inputs=[originals, suggestions] + decision_radios, outputs=final_output)
|
155 |
+
|
156 |
+
demo.launch()
|