import gradio as gr from newspaper import Article from transformers import pipeline import pytesseract from PIL import Image import requests from io import BytesIO # Load model reviewer = pipeline("text2text-generation", model="google/flan-t5-base") # OCR def extract_text_from_image_url(img_url): try: response = requests.get(img_url) img = Image.open(BytesIO(response.content)) text = pytesseract.image_to_string(img) return text except Exception as e: return f"❌ OCR Error: {e}" # Extract blog def extract_text_from_url(url): try: article = Article(url) article.download() article.parse() return article.text except Exception as e: return f"❌ Blog Error: {e}" # Review line-by-line def review_lines(text): lines = text.strip().split('\n') suggestions = [] for line in lines: if line.strip() == "": continue prompt = f"Rewrite this to fix grammar, tone, and remove any offensive language:\n\n{line}" suggestion = reviewer(prompt, max_new_tokens=100)[0]['generated_text'] suggestions.append((line, suggestion.strip())) return suggestions # Finalize by combining accepted lines def finalize_text(originals, suggestions, decisions): final = [] for orig, sugg, keep in zip(originals, suggestions, decisions): final.append(sugg if keep else orig) return "\n".join(final) # Gradio UI with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as app: gr.Markdown("## ✨ BlogChecker AI\nSmart AI reviewer for blog content, with interactive approval and OCR image support.") with gr.Row(): blog_url = gr.Textbox(label="📎 Blog URL") image_url = gr.Textbox(label="🖼️ Image URL (optional)") extract_btn = gr.Button("🔍 Extract") combined_text = gr.Textbox(label="📝 Combined Blog + OCR Text", lines=10) with gr.Row(): review_btn = gr.Button("🧠 Review Content") finalize_btn = gr.Button("✅ Finalize Clean Blog") line_outputs = [] decisions = [] originals = [] suggestions = [] with gr.Column() as dynamic_review_section: for i in range(5): # support 5 lines for demo; can be dynamic later orig = gr.Textbox(label=f"Original Line {i+1}", interactive=False) sugg = gr.Textbox(label=f"Suggested Line {i+1}", interactive=False) accept = gr.Checkbox(label="✅ Accept Suggestion") originals.append(orig) suggestions.append(sugg) decisions.append(accept) line_outputs.append((orig, sugg, accept)) final_output = gr.Textbox(label="📦 Final Clean Blog", lines=10) def extract_both(url, img_url): blog = extract_text_from_url(url) ocr = extract_text_from_image_url(img_url) if img_url else "" return blog + ("\n" + ocr if ocr else "") def process_review(text): results = review_lines(text) outputs = [] for i in range(len(line_outputs)): if i < len(results): outputs.extend([results[i][0], results[i][1], False]) # orig, suggestion, unselected else: outputs.extend(["", "", False]) # clear unused slots return outputs def collect_decisions(origs, suggs, accepts): return finalize_text(origs, suggs, accepts) # Wire actions extract_btn.click(fn=extract_both, inputs=[blog_url, image_url], outputs=combined_text) review_btn.click(fn=process_review, inputs=combined_text, outputs=[el for group in line_outputs for el in group]) finalize_btn.click(fn=collect_decisions, inputs=[originals, suggestions, decisions], outputs=final_output) app.launch()