File size: 3,876 Bytes
6af176b
2c83941
6af176b
c873d13
 
 
 
6af176b
c873d13
6af176b
 
c873d13
 
 
 
 
 
 
 
 
 
 
2c83941
 
 
 
 
 
 
6af176b
c873d13
 
 
 
 
 
 
 
 
 
 
6af176b
c873d13
 
 
 
 
 
6af176b
 
c873d13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16e02f2
 
 
 
 
c873d13
 
16e02f2
c873d13
 
 
16e02f2
6af176b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import gradio as gr
import trafilatura
from transformers import pipeline
import pytesseract
from PIL import Image
import requests
from io import BytesIO

# Load model
reviewer = pipeline("text2text-generation", model="google/flan-t5-base")

# OCR
def extract_text_from_image_url(img_url):
    try:
        response = requests.get(img_url)
        img = Image.open(BytesIO(response.content))
        text = pytesseract.image_to_string(img)
        return text
    except Exception as e:
        return f"❌ OCR Error: {e}"

# Extract blog
def extract_text_from_url(url): 
    downloaded = trafilatura.fetch_url(url)
    if downloaded:
        return trafilatura.extract(downloaded)
    else:
        return "❌ Blog Error: Could not fetch content from the URL."


# Review line-by-line
def review_lines(text):
    lines = text.strip().split('\n')
    suggestions = []
    for line in lines:
        if line.strip() == "":
            continue
        prompt = f"Rewrite this to fix grammar, tone, and remove any offensive language:\n\n{line}"
        suggestion = reviewer(prompt, max_new_tokens=100)[0]['generated_text']
        suggestions.append((line, suggestion.strip()))
    return suggestions

# Finalize by combining accepted lines
def finalize_text(originals, suggestions, decisions):
    final = []
    for orig, sugg, keep in zip(originals, suggestions, decisions):
        final.append(sugg if keep else orig)
    return "\n".join(final)

# Gradio UI
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as app:
    gr.Markdown("## ✨ BlogChecker AI\nSmart AI reviewer for blog content, with interactive approval and OCR image support.")

    with gr.Row():
        blog_url = gr.Textbox(label="πŸ“Ž Blog URL")
        image_url = gr.Textbox(label="πŸ–ΌοΈ Image URL (optional)")
        extract_btn = gr.Button("πŸ” Extract")

    combined_text = gr.Textbox(label="πŸ“ Combined Blog + OCR Text", lines=10)

    with gr.Row():
        review_btn = gr.Button("🧠 Review Content")
        finalize_btn = gr.Button("βœ… Finalize Clean Blog")

    line_outputs = []
    decisions = []
    originals = []
    suggestions = []

    with gr.Column() as dynamic_review_section:
        for i in range(5):  # support 5 lines for demo; can be dynamic later
            orig = gr.Textbox(label=f"Original Line {i+1}", interactive=False)
            sugg = gr.Textbox(label=f"Suggested Line {i+1}", interactive=False)
            accept = gr.Checkbox(label="βœ… Accept Suggestion")
            originals.append(orig)
            suggestions.append(sugg)
            decisions.append(accept)
            line_outputs.append((orig, sugg, accept))

    final_output = gr.Textbox(label="πŸ“¦ Final Clean Blog", lines=10)

    def extract_both(url, img_url):
        blog = extract_text_from_url(url)
        ocr = extract_text_from_image_url(img_url) if img_url else ""
        return blog + ("\n" + ocr if ocr else "")

    def process_review(text):
        results = review_lines(text)
        outputs = []
        for i in range(len(line_outputs)):
            if i < len(results):
                outputs.extend([results[i][0], results[i][1], False])  # orig, suggestion, unselected
            else:
                outputs.extend(["", "", False])  # clear unused slots
        return outputs

    def collect_decisions(*args):
        count = len(args) // 3
        origs = args[:count]
        suggs = args[count:2*count]
        accepts = args[2*count:]
        return finalize_text(origs, suggs, accepts)


    # Wire actions
    extract_btn.click(fn=extract_both, inputs=[blog_url, image_url], outputs=combined_text)
    review_btn.click(fn=process_review, inputs=combined_text, outputs=[el for group in line_outputs for el in group])
    finalize_btn.click(fn=collect_decisions, inputs=originals + suggestions + decisions, outputs=final_output)

app.launch()