Spaces:

JaishnaCodz
/

BlogReviewer

Sleeping

App Files Files Community

JaishnaCodz commited on Jul 16

Commit

b2d2b3c

verified ·

1 Parent(s): 5ef657b

Update app.py

Browse files

Files changed (1) hide show

app.py +146 -110

app.py CHANGED Viewed

@@ -1,120 +1,156 @@
 import gradio as gr
-import trafilatura
-from transformers import pipeline
-import pytesseract
-from PIL import Image
 import requests
-from io import BytesIO
 import difflib
-# Load the model
-reviewer = pipeline("text2text-generation", model="google/flan-t5-base")
-# Global storage for review UI
-review_boxes = []
-# OCR from image
-def extract_text_from_image_url(img_url):
     try:
-        response = requests.get(img_url)
-        img = Image.open(BytesIO(response.content))
-        text = pytesseract.image_to_string(img)
-        return text
     except Exception as e:
         return f"❌ OCR Error: {e}"
-# Blog content from URL
-def extract_text_from_url(url):
-    downloaded = trafilatura.fetch_url(url)
-    if downloaded:
-        return trafilatura.extract(downloaded)
-    else:
-        return "❌ Blog Error: Could not fetch content from the URL."
-# Highlight word-level differences
-def highlight_diffs(orig, suggestion):
-    diff = difflib.ndiff(orig.split(), suggestion.split())
-    result = []
-    for word in diff:
-        if word.startswith('- '):
-            result.append(f"~~{word[2:]}~~")
-        elif word.startswith('+ '):
-            result.append(f"**{word[2:]}**")
-        elif word.startswith('  '):
-            result.append(word[2:])
-    return " ".join(result)
-# Process each line
-def review_lines(text):
-    lines = text.strip().split('\n')
-    reviewed = []
-    for line in lines:
-        if not line.strip():
-            continue
-        prompt = f"Fix grammar, tone, and clarity:\n\n{line}"
-        response = reviewer(prompt, max_new_tokens=100)[0]['generated_text'].strip()
-        highlighted = highlight_diffs(line.strip(), response)
-        reviewed.append((line.strip(), highlighted, response))
-    return reviewed
-# Finalize accepted suggestions
-def finalize_text(originals, suggestions, decisions):
-    output = []
-    for orig, sugg, accepted in zip(originals, suggestions, decisions):
-        output.append(sugg if accepted else orig)
-    return "\n".join(output)
-# Build UI
-with gr.Blocks(theme=gr.themes.Soft()) as app:
-    gr.Markdown("## ✨ BlogChecker AI\nSmart blog reviewer with OCR + AI suggestions")
-    with gr.Row():
-        blog_url = gr.Textbox(label="📎 Blog URL")
-        image_url = gr.Textbox(label="🖼️ Image URL (optional)")
-        extract_btn = gr.Button("🔍 Extract")
-    combined_text = gr.Textbox(label="📝 Combined Blog + OCR Text", lines=10)
-    with gr.Row():
-        review_btn = gr.Button("🧠 Review Content")
-        finalize_btn = gr.Button("✅ Finalize Clean Blog")
-    review_area = gr.Column(visible=False)
-    final_output = gr.Textbox(label="📦 Final Clean Blog", lines=10)
-    # Combine blog + OCR
-    def extract_both(url, img_url):
-        blog = extract_text_from_url(url)
-        ocr = extract_text_from_image_url(img_url) if img_url else ""
-        return blog + ("\n" + ocr if ocr else "")
-    # Generate suggestions and show UI
-    def do_review(text):
-        results = review_lines(text)
-        review_area.children.clear()
-        review_boxes.clear()
-        elems = []
-        for idx, (orig, highlighted, clean) in enumerate(results):
-            orig_box = gr.Textbox(value=orig, label=f"Original Line {idx+1}", interactive=False)
-            markdown_sugg = gr.Markdown(value=highlighted, label=f"Suggested Edit {idx+1}")
-            accept = gr.Checkbox(label="✅ Accept Suggestion", value=False)
-            elems.extend([orig_box, markdown_sugg, accept])
-            review_boxes.append((orig_box, clean, accept))
-        review_area.children = elems
-        return gr.update(visible=True)
-    # Collect accepted decisions
-    def collect_results():
-        originals = [box[0].value for box in review_boxes]
-        suggestions = [box[1] for box in review_boxes]
-        accepts = [box[2].value for box in review_boxes]
-        return finalize_text(originals, suggestions, accepts)
-    # Wire events
-    extract_btn.click(fn=extract_both, inputs=[blog_url, image_url], outputs=combined_text)
-    review_btn.click(fn=do_review, inputs=combined_text, outputs=review_area)
-    finalize_btn.click(fn=collect_results, outputs=final_output)
-app.launch()

 import gradio as gr
 import requests
+from PIL import Image
+import pytesseract
 import difflib
+from io import BytesIO
+from transformers import pipeline
+import trafilatura
+from nltk.tokenize import sent_tokenize
+import nltk
+nltk.download("punkt")
+# === Load AI model ===
+reviewer = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", max_new_tokens=200)
+device = "cpu"
+print(f"Device set to use {device}")
+# === Utility: Highlight diffs ===
+def highlight_diff(original, suggestion):
+    diff = difflib.ndiff(original.split(), suggestion.split())
+    result = ""
+    for word in diff:
+        if word.startswith("- "):
+            result += f"<span style='color:red;text-decoration:line-through'>{word[2:]}</span> "
+        elif word.startswith("+ "):
+            result += f"<span style='color:green;font-weight:bold'>{word[2:]}</span> "
+        elif word.startswith("  "):
+            result += word[2:] + " "
+    return result.strip()
+# === Extract blog content from URL ===
+def extract_text_from_url(url):
     try:
+        headers = {"User-Agent": "Mozilla/5.0"}
+        response = requests.get(url, headers=headers, timeout=10)
+        if response.status_code == 200:
+            return trafilatura.extract(response.text)
+        else:
+            return f"❌ Blog Error: HTTP {response.status_code} on URL {url}"
+    except Exception as e:
+        return f"❌ Blog Error: {e}"
+# === Extract text from image URL (OCR) ===
+def extract_text_from_image(image_url):
+    try:
+        img_data = requests.get(image_url).content
+        image = Image.open(BytesIO(img_data)).convert("L")
+        text = pytesseract.image_to_string(image)
+        return text if text.strip() else "❌ OCR Error: No readable text found."
     except Exception as e:
         return f"❌ OCR Error: {e}"
+# === Suggestion generator ===
+def generate_suggestions(text):
+    sentences = sent_tokenize(text)
+    suggestions = []
+    for sent in sentences:
+        prompt = f"Improve the tone, grammar, clarity and flag any sensitive content:\n\n{sent}"
+        output = reviewer(prompt, max_new_tokens=200)[0]["generated_text"]
+        cleaned = output.replace(prompt, "").strip()
+        suggestions.append(cleaned if cleaned else sent)
+    return sentences, suggestions
+# === Final approval handler ===
+def collect_decisions(originals, suggestions, *choices):
+    results = []
+    for orig, sugg, choice in zip(originals, suggestions, choices):
+        results.append(sugg if choice == "Accept" else orig)
+    return "\n".join(results)
+# === Gradio UI ===
+with gr.Blocks() as demo:
+    gr.Markdown("# ✨ Blog Reviewer AI")
+    gr.Markdown("Detect tone issues, errors, and sensitive content — and clean them interactively!")
+    with gr.Tab("🔗 From Blog URL"):
+        blog_url = gr.Textbox(label="Enter blog URL")
+        fetch_btn = gr.Button("Fetch & Review")
+    with gr.Tab("🖼️ From Image URL (OCR)"):
+        image_url = gr.Textbox(label="Enter Image URL")
+        image_btn = gr.Button("Extract & Review")
+    with gr.Tab("📝 Paste Text"):
+        pasted_text = gr.Textbox(label="Paste blog content here", lines=10)
+        paste_btn = gr.Button("Review Text")
+    output_section = gr.Column(visible=False)
+    originals = gr.State([])
+    suggestions = gr.State([])
+    decision_radios = []
+    view_mode = gr.Radio(["Original", "Suggestion", "Side-by-Side"], value="Side-by-Side", label="Choose View")
+    final_output = gr.Textbox(label="✅ Final Output", lines=12)
+    finalize_btn = gr.Button("Generate Clean Version")
+    sentence_blocks = []
+    # === Show suggestions UI ===
+    def show_review(text):
+        origs, suggs = generate_suggestions(text)
+        originals.value = origs
+        suggestions.value = suggs
+        return origs, suggs, True
+    # === Populate sentence review rows dynamically ===
+    def populate_review_ui(origs, suggs):
+        global decision_radios, sentence_blocks
+        decision_radios = []
+        sentence_blocks = []
+        ui_blocks = []
+        for i, (orig, sugg) in enumerate(zip(origs, suggs)):
+            orig_md = gr.Markdown(f"<b>{orig}</b>", visible=False)
+            sugg_md = gr.Markdown(f"<b>{sugg}</b>", visible=False)
+            diff_md = gr.Markdown(highlight_diff(orig, sugg), visible=True)
+            radio = gr.Radio(["Accept", "Reject"], value="Accept", label=f"Suggestion {i+1}")
+            decision_radios.append(radio)
+            sentence_blocks.append((orig_md, sugg_md, diff_md))
+            ui_blocks.extend([orig_md, sugg_md, diff_md, radio])
+        return ui_blocks
+    # === Toggle view mode ===
+    def toggle_view(view):
+        updates = []
+        for orig_md, sugg_md, diff_md in sentence_blocks:
+            if view == "Original":
+                updates.extend([gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)])
+            elif view == "Suggestion":
+                updates.extend([gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)])
+            else:  # Side-by-side
+                updates.extend([gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)])
+        return updates
+    # === Final output handler ===
+    def finalize_output(origs, suggs, *choices):
+        return collect_decisions(origs, suggs, *choices)
+    # Button click handlers
+    fetch_btn.click(fn=extract_text_from_url, inputs=blog_url, outputs=pasted_text)
+    image_btn.click(fn=extract_text_from_image, inputs=image_url, outputs=pasted_text)
+    paste_btn.click(fn=show_review, inputs=pasted_text, outputs=[originals, suggestions, output_section])
+    # Dynamic render trigger
+    originals.change(fn=populate_review_ui, inputs=[originals, suggestions], outputs=[])
+    view_mode.change(fn=toggle_view, inputs=view_mode,
+                     outputs=[item for block in sentence_blocks for item in block])
+    finalize_btn.click(fn=finalize_output, inputs=[originals, suggestions] + decision_radios, outputs=final_output)
+demo.launch()