Spaces:
Sleeping
Sleeping
import gradio as gr | |
import trafilatura | |
from transformers import pipeline | |
import pytesseract | |
from PIL import Image | |
import requests | |
from io import BytesIO | |
import difflib | |
# Load model | |
reviewer = pipeline("text2text-generation", model="google/flan-t5-base") | |
# OCR | |
def extract_text_from_image_url(img_url): | |
try: | |
response = requests.get(img_url) | |
img = Image.open(BytesIO(response.content)) | |
text = pytesseract.image_to_string(img) | |
return text | |
except Exception as e: | |
return f"β OCR Error: {e}" | |
# Extract blog | |
def extract_text_from_url(url): | |
downloaded = trafilatura.fetch_url(url) | |
if downloaded: | |
return trafilatura.extract(downloaded) | |
else: | |
return "β Blog Error: Could not fetch content from the URL." | |
# Highlight differences using difflib | |
def highlight_diffs(original, suggestion): | |
diff = difflib.ndiff(original.split(), suggestion.split()) | |
result = [] | |
for token in diff: | |
if token.startswith("- "): | |
result.append(f"~~{token[2:]}~~") | |
elif token.startswith("+ "): | |
result.append(f"**{token[2:]}**") | |
elif token.startswith(" "): | |
result.append(token[2:]) | |
return " ".join(result) | |
# Review lines with diffs | |
def review_lines(text): | |
lines = text.strip().split('\n') | |
suggestions = [] | |
for line in lines: | |
if line.strip() == "": | |
continue | |
prompt = f"Rewrite this to fix grammar, tone, and remove any offensive language:\n\n{line}" | |
suggestion = reviewer(prompt, max_new_tokens=100)[0]['generated_text'].strip() | |
highlighted = highlight_diffs(line.strip(), suggestion) | |
suggestions.append((line, highlighted, suggestion)) | |
return suggestions | |
# Finalize accepted suggestions | |
def finalize_text(originals, suggestions, decisions): | |
final = [] | |
for orig, sugg, keep in zip(originals, suggestions, decisions): | |
final.append(sugg if keep else orig) | |
return "\n".join(final) | |
# Gradio app | |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as app: | |
gr.Markdown("## β¨ BlogChecker AI\nSmart AI reviewer for blog content, with interactive approval and OCR image support.") | |
with gr.Row(): | |
blog_url = gr.Textbox(label="π Blog URL") | |
image_url = gr.Textbox(label="πΌοΈ Image URL (optional)") | |
extract_btn = gr.Button("π Extract") | |
combined_text = gr.Textbox(label="π Combined Blog + OCR Text", lines=10) | |
with gr.Row(): | |
review_btn = gr.Button("π§ Review Content") | |
finalize_btn = gr.Button("β Finalize Clean Blog") | |
review_section = gr.Column(visible=False) | |
review_boxes = [] # Will store tuples: (original_box, highlighted_markdown_box, accept_checkbox, clean_suggestion) | |
final_output = gr.Textbox(label="π¦ Final Clean Blog", lines=10) | |
# Text extraction logic | |
def extract_both(url, img_url): | |
blog = extract_text_from_url(url) | |
ocr = extract_text_from_image_url(img_url) if img_url else "" | |
return blog + ("\n" + ocr if ocr else "") | |
# Review processing with diffs | |
def process_review(text): | |
results = review_lines(text) | |
review_section.children.clear() | |
review_boxes.clear() | |
for i, (orig, highlighted, clean_sugg) in enumerate(results): | |
with review_section: | |
orig_box = gr.Textbox(value=orig, label=f"Original Line {i+1}", interactive=False) | |
markdown_sugg = gr.Markdown(value=highlighted, label=f"Suggested Edit {i+1}") | |
accept_box = gr.Checkbox(label="β Accept Suggestion", value=False) | |
review_boxes.append((orig_box, markdown_sugg, accept_box, clean_sugg)) | |
return gr.update(visible=True) | |
# Finalization logic | |
def collect_dynamic_decisions(): | |
originals_vals = [box[0].value for box in review_boxes] | |
clean_suggestions = [box[3] for box in review_boxes] | |
accepts_vals = [box[2].value for box in review_boxes] | |
return finalize_text(originals_vals, clean_suggestions, accepts_vals) | |
# Wire actions | |
extract_btn.click(fn=extract_both, inputs=[blog_url, image_url], outputs=combined_text) | |
review_btn.click(fn=process_review, inputs=combined_text, outputs=review_section) | |
finalize_btn.click(fn=collect_dynamic_decisions, outputs=final_output) | |
app.launch() | |