Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
from PIL import Image | |
import pytesseract | |
import difflib | |
from io import BytesIO | |
from transformers import pipeline | |
import trafilatura | |
from nltk.tokenize import sent_tokenize | |
import nltk | |
nltk.download("punkt") | |
# === Load AI model === | |
reviewer = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", max_new_tokens=200) | |
device = "cpu" | |
print(f"Device set to use {device}") | |
# === Utility: Highlight diffs === | |
def highlight_diff(original, suggestion): | |
diff = difflib.ndiff(original.split(), suggestion.split()) | |
result = "" | |
for word in diff: | |
if word.startswith("- "): | |
result += f"<span style='color:red;text-decoration:line-through'>{word[2:]}</span> " | |
elif word.startswith("+ "): | |
result += f"<span style='color:green;font-weight:bold'>{word[2:]}</span> " | |
elif word.startswith(" "): | |
result += word[2:] + " " | |
return result.strip() | |
# === Extract blog content from URL === | |
def extract_text_from_url(url): | |
try: | |
headers = {"User-Agent": "Mozilla/5.0"} | |
response = requests.get(url, headers=headers, timeout=10) | |
if response.status_code == 200: | |
return trafilatura.extract(response.text) | |
else: | |
return f"β Blog Error: HTTP {response.status_code} on URL {url}" | |
except Exception as e: | |
return f"β Blog Error: {e}" | |
# === Extract text from image URL (OCR) === | |
def extract_text_from_image(image_url): | |
try: | |
img_data = requests.get(image_url).content | |
image = Image.open(BytesIO(img_data)).convert("L") | |
text = pytesseract.image_to_string(image) | |
return text if text.strip() else "β OCR Error: No readable text found." | |
except Exception as e: | |
return f"β OCR Error: {e}" | |
# === Suggestion generator === | |
def generate_suggestions(text): | |
sentences = sent_tokenize(text) | |
suggestions = [] | |
for sent in sentences: | |
prompt = f"Improve the tone, grammar, clarity and flag any sensitive content:\n\n{sent}" | |
output = reviewer(prompt, max_new_tokens=200)[0]["generated_text"] | |
cleaned = output.replace(prompt, "").strip() | |
suggestions.append(cleaned if cleaned else sent) | |
return sentences, suggestions | |
# === Final approval handler === | |
def collect_decisions(originals, suggestions, *choices): | |
results = [] | |
for orig, sugg, choice in zip(originals, suggestions, choices): | |
results.append(sugg if choice == "Accept" else orig) | |
return "\n".join(results) | |
# === Gradio UI === | |
with gr.Blocks() as demo: | |
gr.Markdown("# β¨ Blog Reviewer AI") | |
gr.Markdown("Detect tone issues, errors, and sensitive content β and clean them interactively!") | |
with gr.Tab("π From Blog URL"): | |
blog_url = gr.Textbox(label="Enter blog URL") | |
fetch_btn = gr.Button("Fetch & Review") | |
with gr.Tab("πΌοΈ From Image URL (OCR)"): | |
image_url = gr.Textbox(label="Enter Image URL") | |
image_btn = gr.Button("Extract & Review") | |
with gr.Tab("π Paste Text"): | |
pasted_text = gr.Textbox(label="Paste blog content here", lines=10) | |
paste_btn = gr.Button("Review Text") | |
output_section = gr.Column(visible=False) | |
originals = gr.State([]) | |
suggestions = gr.State([]) | |
decision_radios = [] | |
view_mode = gr.Radio(["Original", "Suggestion", "Side-by-Side"], value="Side-by-Side", label="Choose View") | |
final_output = gr.Textbox(label="β Final Output", lines=12) | |
finalize_btn = gr.Button("Generate Clean Version") | |
sentence_blocks = [] | |
# === Show suggestions UI === | |
def show_review(text): | |
origs, suggs = generate_suggestions(text) | |
originals.value = origs | |
suggestions.value = suggs | |
return origs, suggs, True | |
# === Populate sentence review rows dynamically === | |
def populate_review_ui(origs, suggs): | |
global decision_radios, sentence_blocks | |
decision_radios = [] | |
sentence_blocks = [] | |
ui_blocks = [] | |
for i, (orig, sugg) in enumerate(zip(origs, suggs)): | |
orig_md = gr.Markdown(f"<b>{orig}</b>", visible=False) | |
sugg_md = gr.Markdown(f"<b>{sugg}</b>", visible=False) | |
diff_md = gr.Markdown(highlight_diff(orig, sugg), visible=True) | |
radio = gr.Radio(["Accept", "Reject"], value="Accept", label=f"Suggestion {i+1}") | |
decision_radios.append(radio) | |
sentence_blocks.append((orig_md, sugg_md, diff_md)) | |
ui_blocks.extend([orig_md, sugg_md, diff_md, radio]) | |
return ui_blocks | |
# === Toggle view mode === | |
def toggle_view(view): | |
updates = [] | |
for orig_md, sugg_md, diff_md in sentence_blocks: | |
if view == "Original": | |
updates.extend([gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)]) | |
elif view == "Suggestion": | |
updates.extend([gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)]) | |
else: # Side-by-side | |
updates.extend([gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)]) | |
return updates | |
# === Final output handler === | |
def finalize_output(origs, suggs, *choices): | |
return collect_decisions(origs, suggs, *choices) | |
# Button click handlers | |
fetch_btn.click(fn=extract_text_from_url, inputs=blog_url, outputs=pasted_text) | |
image_btn.click(fn=extract_text_from_image, inputs=image_url, outputs=pasted_text) | |
paste_btn.click(fn=show_review, inputs=pasted_text, outputs=[originals, suggestions, output_section]) | |
# Dynamic render trigger | |
originals.change(fn=populate_review_ui, inputs=[originals, suggestions], outputs=[]) | |
view_mode.change(fn=toggle_view, inputs=view_mode, | |
outputs=[item for block in sentence_blocks for item in block]) | |
finalize_btn.click(fn=finalize_output, inputs=[originals, suggestions] + decision_radios, outputs=final_output) | |
demo.launch() | |