Spaces:
Sleeping
Sleeping
File size: 4,361 Bytes
6af176b 2c83941 6af176b c873d13 5c898fe 6af176b c873d13 6af176b c873d13 2c83941 5c898fe c873d13 5c898fe c873d13 6af176b 5c898fe c873d13 6af176b 2324254 c873d13 2324254 5c898fe c873d13 5c898fe c873d13 5c898fe c873d13 2324254 5c898fe 2324254 5c898fe 2324254 5c898fe 2324254 5c898fe 2324254 5c898fe 2324254 5c898fe 16e02f2 c873d13 2324254 6af176b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import gradio as gr
import trafilatura
from transformers import pipeline
import pytesseract
from PIL import Image
import requests
from io import BytesIO
import difflib
# Load model
reviewer = pipeline("text2text-generation", model="google/flan-t5-base")
# OCR
def extract_text_from_image_url(img_url):
try:
response = requests.get(img_url)
img = Image.open(BytesIO(response.content))
text = pytesseract.image_to_string(img)
return text
except Exception as e:
return f"β OCR Error: {e}"
# Extract blog
def extract_text_from_url(url):
downloaded = trafilatura.fetch_url(url)
if downloaded:
return trafilatura.extract(downloaded)
else:
return "β Blog Error: Could not fetch content from the URL."
# Highlight differences using difflib
def highlight_diffs(original, suggestion):
diff = difflib.ndiff(original.split(), suggestion.split())
result = []
for token in diff:
if token.startswith("- "):
result.append(f"~~{token[2:]}~~")
elif token.startswith("+ "):
result.append(f"**{token[2:]}**")
elif token.startswith(" "):
result.append(token[2:])
return " ".join(result)
# Review lines with diffs
def review_lines(text):
lines = text.strip().split('\n')
suggestions = []
for line in lines:
if line.strip() == "":
continue
prompt = f"Rewrite this to fix grammar, tone, and remove any offensive language:\n\n{line}"
suggestion = reviewer(prompt, max_new_tokens=100)[0]['generated_text'].strip()
highlighted = highlight_diffs(line.strip(), suggestion)
suggestions.append((line, highlighted, suggestion))
return suggestions
# Finalize accepted suggestions
def finalize_text(originals, suggestions, decisions):
final = []
for orig, sugg, keep in zip(originals, suggestions, decisions):
final.append(sugg if keep else orig)
return "\n".join(final)
# Gradio app
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as app:
gr.Markdown("## β¨ BlogChecker AI\nSmart AI reviewer for blog content, with interactive approval and OCR image support.")
with gr.Row():
blog_url = gr.Textbox(label="π Blog URL")
image_url = gr.Textbox(label="πΌοΈ Image URL (optional)")
extract_btn = gr.Button("π Extract")
combined_text = gr.Textbox(label="π Combined Blog + OCR Text", lines=10)
with gr.Row():
review_btn = gr.Button("π§ Review Content")
finalize_btn = gr.Button("β
Finalize Clean Blog")
review_section = gr.Column(visible=False)
review_boxes = [] # Will store tuples: (original_box, highlighted_markdown_box, accept_checkbox, clean_suggestion)
final_output = gr.Textbox(label="π¦ Final Clean Blog", lines=10)
# Text extraction logic
def extract_both(url, img_url):
blog = extract_text_from_url(url)
ocr = extract_text_from_image_url(img_url) if img_url else ""
return blog + ("\n" + ocr if ocr else "")
# Review processing with diffs
def process_review(text):
results = review_lines(text)
review_section.children.clear()
review_boxes.clear()
for i, (orig, highlighted, clean_sugg) in enumerate(results):
with review_section:
orig_box = gr.Textbox(value=orig, label=f"Original Line {i+1}", interactive=False)
markdown_sugg = gr.Markdown(value=highlighted, label=f"Suggested Edit {i+1}")
accept_box = gr.Checkbox(label="β
Accept Suggestion", value=False)
review_boxes.append((orig_box, markdown_sugg, accept_box, clean_sugg))
return gr.update(visible=True)
# Finalization logic
def collect_dynamic_decisions():
originals_vals = [box[0].value for box in review_boxes]
clean_suggestions = [box[3] for box in review_boxes]
accepts_vals = [box[2].value for box in review_boxes]
return finalize_text(originals_vals, clean_suggestions, accepts_vals)
# Wire actions
extract_btn.click(fn=extract_both, inputs=[blog_url, image_url], outputs=combined_text)
review_btn.click(fn=process_review, inputs=combined_text, outputs=review_section)
finalize_btn.click(fn=collect_dynamic_decisions, outputs=final_output)
app.launch()
|