BlogReviewer / app.py
JaishnaCodz's picture
Update app.py
2c83941 verified
raw
history blame
3.88 kB
import gradio as gr
import trafilatura
from transformers import pipeline
import pytesseract
from PIL import Image
import requests
from io import BytesIO
# Load model
reviewer = pipeline("text2text-generation", model="google/flan-t5-base")
# OCR
def extract_text_from_image_url(img_url):
try:
response = requests.get(img_url)
img = Image.open(BytesIO(response.content))
text = pytesseract.image_to_string(img)
return text
except Exception as e:
return f"❌ OCR Error: {e}"
# Extract blog
def extract_text_from_url(url):
downloaded = trafilatura.fetch_url(url)
if downloaded:
return trafilatura.extract(downloaded)
else:
return "❌ Blog Error: Could not fetch content from the URL."
# Review line-by-line
def review_lines(text):
lines = text.strip().split('\n')
suggestions = []
for line in lines:
if line.strip() == "":
continue
prompt = f"Rewrite this to fix grammar, tone, and remove any offensive language:\n\n{line}"
suggestion = reviewer(prompt, max_new_tokens=100)[0]['generated_text']
suggestions.append((line, suggestion.strip()))
return suggestions
# Finalize by combining accepted lines
def finalize_text(originals, suggestions, decisions):
final = []
for orig, sugg, keep in zip(originals, suggestions, decisions):
final.append(sugg if keep else orig)
return "\n".join(final)
# Gradio UI
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as app:
gr.Markdown("## ✨ BlogChecker AI\nSmart AI reviewer for blog content, with interactive approval and OCR image support.")
with gr.Row():
blog_url = gr.Textbox(label="πŸ“Ž Blog URL")
image_url = gr.Textbox(label="πŸ–ΌοΈ Image URL (optional)")
extract_btn = gr.Button("πŸ” Extract")
combined_text = gr.Textbox(label="πŸ“ Combined Blog + OCR Text", lines=10)
with gr.Row():
review_btn = gr.Button("🧠 Review Content")
finalize_btn = gr.Button("βœ… Finalize Clean Blog")
line_outputs = []
decisions = []
originals = []
suggestions = []
with gr.Column() as dynamic_review_section:
for i in range(5): # support 5 lines for demo; can be dynamic later
orig = gr.Textbox(label=f"Original Line {i+1}", interactive=False)
sugg = gr.Textbox(label=f"Suggested Line {i+1}", interactive=False)
accept = gr.Checkbox(label="βœ… Accept Suggestion")
originals.append(orig)
suggestions.append(sugg)
decisions.append(accept)
line_outputs.append((orig, sugg, accept))
final_output = gr.Textbox(label="πŸ“¦ Final Clean Blog", lines=10)
def extract_both(url, img_url):
blog = extract_text_from_url(url)
ocr = extract_text_from_image_url(img_url) if img_url else ""
return blog + ("\n" + ocr if ocr else "")
def process_review(text):
results = review_lines(text)
outputs = []
for i in range(len(line_outputs)):
if i < len(results):
outputs.extend([results[i][0], results[i][1], False]) # orig, suggestion, unselected
else:
outputs.extend(["", "", False]) # clear unused slots
return outputs
def collect_decisions(*args):
count = len(args) // 3
origs = args[:count]
suggs = args[count:2*count]
accepts = args[2*count:]
return finalize_text(origs, suggs, accepts)
# Wire actions
extract_btn.click(fn=extract_both, inputs=[blog_url, image_url], outputs=combined_text)
review_btn.click(fn=process_review, inputs=combined_text, outputs=[el for group in line_outputs for el in group])
finalize_btn.click(fn=collect_decisions, inputs=originals + suggestions + decisions, outputs=final_output)
app.launch()