Spaces:
Sleeping
Sleeping
File size: 3,876 Bytes
6af176b 2c83941 6af176b c873d13 6af176b c873d13 6af176b c873d13 2c83941 6af176b c873d13 6af176b c873d13 6af176b c873d13 16e02f2 c873d13 16e02f2 c873d13 16e02f2 6af176b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import gradio as gr
import trafilatura
from transformers import pipeline
import pytesseract
from PIL import Image
import requests
from io import BytesIO
# Load model
reviewer = pipeline("text2text-generation", model="google/flan-t5-base")
# OCR
def extract_text_from_image_url(img_url):
try:
response = requests.get(img_url)
img = Image.open(BytesIO(response.content))
text = pytesseract.image_to_string(img)
return text
except Exception as e:
return f"β OCR Error: {e}"
# Extract blog
def extract_text_from_url(url):
downloaded = trafilatura.fetch_url(url)
if downloaded:
return trafilatura.extract(downloaded)
else:
return "β Blog Error: Could not fetch content from the URL."
# Review line-by-line
def review_lines(text):
lines = text.strip().split('\n')
suggestions = []
for line in lines:
if line.strip() == "":
continue
prompt = f"Rewrite this to fix grammar, tone, and remove any offensive language:\n\n{line}"
suggestion = reviewer(prompt, max_new_tokens=100)[0]['generated_text']
suggestions.append((line, suggestion.strip()))
return suggestions
# Finalize by combining accepted lines
def finalize_text(originals, suggestions, decisions):
final = []
for orig, sugg, keep in zip(originals, suggestions, decisions):
final.append(sugg if keep else orig)
return "\n".join(final)
# Gradio UI
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as app:
gr.Markdown("## β¨ BlogChecker AI\nSmart AI reviewer for blog content, with interactive approval and OCR image support.")
with gr.Row():
blog_url = gr.Textbox(label="π Blog URL")
image_url = gr.Textbox(label="πΌοΈ Image URL (optional)")
extract_btn = gr.Button("π Extract")
combined_text = gr.Textbox(label="π Combined Blog + OCR Text", lines=10)
with gr.Row():
review_btn = gr.Button("π§ Review Content")
finalize_btn = gr.Button("β
Finalize Clean Blog")
line_outputs = []
decisions = []
originals = []
suggestions = []
with gr.Column() as dynamic_review_section:
for i in range(5): # support 5 lines for demo; can be dynamic later
orig = gr.Textbox(label=f"Original Line {i+1}", interactive=False)
sugg = gr.Textbox(label=f"Suggested Line {i+1}", interactive=False)
accept = gr.Checkbox(label="β
Accept Suggestion")
originals.append(orig)
suggestions.append(sugg)
decisions.append(accept)
line_outputs.append((orig, sugg, accept))
final_output = gr.Textbox(label="π¦ Final Clean Blog", lines=10)
def extract_both(url, img_url):
blog = extract_text_from_url(url)
ocr = extract_text_from_image_url(img_url) if img_url else ""
return blog + ("\n" + ocr if ocr else "")
def process_review(text):
results = review_lines(text)
outputs = []
for i in range(len(line_outputs)):
if i < len(results):
outputs.extend([results[i][0], results[i][1], False]) # orig, suggestion, unselected
else:
outputs.extend(["", "", False]) # clear unused slots
return outputs
def collect_decisions(*args):
count = len(args) // 3
origs = args[:count]
suggs = args[count:2*count]
accepts = args[2*count:]
return finalize_text(origs, suggs, accepts)
# Wire actions
extract_btn.click(fn=extract_both, inputs=[blog_url, image_url], outputs=combined_text)
review_btn.click(fn=process_review, inputs=combined_text, outputs=[el for group in line_outputs for el in group])
finalize_btn.click(fn=collect_decisions, inputs=originals + suggestions + decisions, outputs=final_output)
app.launch()
|