File size: 4,100 Bytes
6af176b
2c83941
6af176b
c873d13
 
 
 
5c898fe
6af176b
c873d13
6af176b
 
b63764b
c873d13
 
 
 
 
 
 
 
 
b63764b
2c83941
 
 
 
 
 
 
b63764b
 
 
5c898fe
b63764b
 
 
 
 
 
 
5c898fe
 
b63764b
c873d13
 
b63764b
c873d13
b63764b
c873d13
b63764b
 
 
 
 
6af176b
5c898fe
c873d13
b63764b
 
 
 
6af176b
b63764b
 
 
c873d13
 
 
 
 
 
 
 
 
 
 
 
b63764b
c873d13
 
b63764b
 
 
c873d13
 
 
 
 
b63764b
 
c873d13
b63764b
2324254
b63764b
 
 
 
 
 
2324254
 
b63764b
 
 
 
 
 
16e02f2
b63764b
c873d13
b63764b
 
6af176b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import gradio as gr
import trafilatura
from transformers import pipeline
import pytesseract
from PIL import Image
import requests
from io import BytesIO
import difflib

# Load model
reviewer = pipeline("text2text-generation", model="google/flan-t5-base")

# OCR from image URL
def extract_text_from_image_url(img_url):
    try:
        response = requests.get(img_url)
        img = Image.open(BytesIO(response.content))
        text = pytesseract.image_to_string(img)
        return text
    except Exception as e:
        return f"❌ OCR Error: {e}"

# Extract main blog content from URL
def extract_text_from_url(url): 
    downloaded = trafilatura.fetch_url(url)
    if downloaded:
        return trafilatura.extract(downloaded)
    else:
        return "❌ Blog Error: Could not fetch content from the URL."

# Highlight diffs using difflib
def highlight_diffs(orig, suggestion):
    diff = difflib.ndiff(orig.split(), suggestion.split())
    result = []
    for word in diff:
        if word.startswith('- '):
            result.append(f"~~{word[2:]}~~")
        elif word.startswith('+ '):
            result.append(f"**{word[2:]}**")
        elif word.startswith('  '):
            result.append(word[2:])
    return " ".join(result)

# Review line-by-line
def review_lines(text):
    lines = text.strip().split('\n')
    reviewed = []
    for line in lines:
        if not line.strip():
            continue
        prompt = f"Fix grammar, tone, and clarity:\n\n{line}"
        response = reviewer(prompt, max_new_tokens=100)[0]['generated_text'].strip()
        highlighted = highlight_diffs(line.strip(), response)
        reviewed.append((line.strip(), highlighted, response))
    return reviewed

# Finalize accepted suggestions
def finalize_text(originals, suggestions, decisions):
    output = []
    for orig, sugg, accepted in zip(originals, suggestions, decisions):
        output.append(sugg if accepted else orig)
    return "\n".join(output)

# Build Gradio UI
with gr.Blocks(theme=gr.themes.Soft()) as app:
    gr.Markdown("## ✨ BlogChecker AI\nSmart blog reviewer with OCR + AI suggestions")

    with gr.Row():
        blog_url = gr.Textbox(label="πŸ“Ž Blog URL")
        image_url = gr.Textbox(label="πŸ–ΌοΈ Image URL (optional)")
        extract_btn = gr.Button("πŸ” Extract")

    combined_text = gr.Textbox(label="πŸ“ Combined Blog + OCR Text", lines=10)

    with gr.Row():
        review_btn = gr.Button("🧠 Review Content")
        finalize_btn = gr.Button("βœ… Finalize Clean Blog")

    review_area = gr.Column(visible=False)
    final_output = gr.Textbox(label="πŸ“¦ Final Clean Blog", lines=10)

    review_boxes = []

    # Extract combined content
    def extract_both(url, img_url):
        blog = extract_text_from_url(url)
        ocr = extract_text_from_image_url(img_url) if img_url else ""
        return blog + ("\n" + ocr if ocr else "")

    # Review and build UI dynamically
    def do_review(text):
        results = review_lines(text)
        review_area.children.clear()
        review_boxes.clear()
        for idx, (orig, highlighted, clean) in enumerate(results):
            with review_area:
                orig_box = gr.Textbox(value=orig, label=f"Original Line {idx+1}", interactive=False)
                markdown_sugg = gr.Markdown(value=highlighted, label=f"Suggested Edit {idx+1}")
                accept = gr.Checkbox(label="βœ… Accept Suggestion", value=False)
                review_boxes.append((orig_box, clean, accept))
        return gr.update(visible=True)

    # Compile final clean version
    def collect_results():
        originals = [box[0].value for box in review_boxes]
        suggestions = [box[1] for box in review_boxes]
        accepts = [box[2].value for box in review_boxes]
        return finalize_text(originals, suggestions, accepts)

    # Wire buttons
    extract_btn.click(fn=extract_both, inputs=[blog_url, image_url], outputs=combined_text)
    review_btn.click(fn=do_review, inputs=combined_text, outputs=review_area)
    finalize_btn.click(fn=collect_results, outputs=final_output)

app.launch()