JaishnaCodz commited on
Commit
b2d2b3c
Β·
verified Β·
1 Parent(s): 5ef657b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -110
app.py CHANGED
@@ -1,120 +1,156 @@
1
  import gradio as gr
2
- import trafilatura
3
- from transformers import pipeline
4
- import pytesseract
5
- from PIL import Image
6
  import requests
7
- from io import BytesIO
 
8
  import difflib
 
 
 
 
 
9
 
10
- # Load the model
11
- reviewer = pipeline("text2text-generation", model="google/flan-t5-base")
12
 
13
- # Global storage for review UI
14
- review_boxes = []
 
 
15
 
16
- # OCR from image
17
- def extract_text_from_image_url(img_url):
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  try:
19
- response = requests.get(img_url)
20
- img = Image.open(BytesIO(response.content))
21
- text = pytesseract.image_to_string(img)
22
- return text
 
 
 
 
 
 
 
 
 
 
 
 
23
  except Exception as e:
24
  return f"❌ OCR Error: {e}"
25
 
26
- # Blog content from URL
27
- def extract_text_from_url(url):
28
- downloaded = trafilatura.fetch_url(url)
29
- if downloaded:
30
- return trafilatura.extract(downloaded)
31
- else:
32
- return "❌ Blog Error: Could not fetch content from the URL."
33
-
34
- # Highlight word-level differences
35
- def highlight_diffs(orig, suggestion):
36
- diff = difflib.ndiff(orig.split(), suggestion.split())
37
- result = []
38
- for word in diff:
39
- if word.startswith('- '):
40
- result.append(f"~~{word[2:]}~~")
41
- elif word.startswith('+ '):
42
- result.append(f"**{word[2:]}**")
43
- elif word.startswith(' '):
44
- result.append(word[2:])
45
- return " ".join(result)
46
-
47
- # Process each line
48
- def review_lines(text):
49
- lines = text.strip().split('\n')
50
- reviewed = []
51
- for line in lines:
52
- if not line.strip():
53
- continue
54
- prompt = f"Fix grammar, tone, and clarity:\n\n{line}"
55
- response = reviewer(prompt, max_new_tokens=100)[0]['generated_text'].strip()
56
- highlighted = highlight_diffs(line.strip(), response)
57
- reviewed.append((line.strip(), highlighted, response))
58
- return reviewed
59
-
60
- # Finalize accepted suggestions
61
- def finalize_text(originals, suggestions, decisions):
62
- output = []
63
- for orig, sugg, accepted in zip(originals, suggestions, decisions):
64
- output.append(sugg if accepted else orig)
65
- return "\n".join(output)
66
-
67
- # Build UI
68
- with gr.Blocks(theme=gr.themes.Soft()) as app:
69
- gr.Markdown("## ✨ BlogChecker AI\nSmart blog reviewer with OCR + AI suggestions")
70
-
71
- with gr.Row():
72
- blog_url = gr.Textbox(label="πŸ“Ž Blog URL")
73
- image_url = gr.Textbox(label="πŸ–ΌοΈ Image URL (optional)")
74
- extract_btn = gr.Button("πŸ” Extract")
75
-
76
- combined_text = gr.Textbox(label="πŸ“ Combined Blog + OCR Text", lines=10)
77
-
78
- with gr.Row():
79
- review_btn = gr.Button("🧠 Review Content")
80
- finalize_btn = gr.Button("βœ… Finalize Clean Blog")
81
-
82
- review_area = gr.Column(visible=False)
83
- final_output = gr.Textbox(label="πŸ“¦ Final Clean Blog", lines=10)
84
-
85
- # Combine blog + OCR
86
- def extract_both(url, img_url):
87
- blog = extract_text_from_url(url)
88
- ocr = extract_text_from_image_url(img_url) if img_url else ""
89
- return blog + ("\n" + ocr if ocr else "")
90
-
91
- # Generate suggestions and show UI
92
- def do_review(text):
93
- results = review_lines(text)
94
- review_area.children.clear()
95
- review_boxes.clear()
96
- elems = []
97
-
98
- for idx, (orig, highlighted, clean) in enumerate(results):
99
- orig_box = gr.Textbox(value=orig, label=f"Original Line {idx+1}", interactive=False)
100
- markdown_sugg = gr.Markdown(value=highlighted, label=f"Suggested Edit {idx+1}")
101
- accept = gr.Checkbox(label="βœ… Accept Suggestion", value=False)
102
- elems.extend([orig_box, markdown_sugg, accept])
103
- review_boxes.append((orig_box, clean, accept))
104
-
105
- review_area.children = elems
106
- return gr.update(visible=True)
107
-
108
- # Collect accepted decisions
109
- def collect_results():
110
- originals = [box[0].value for box in review_boxes]
111
- suggestions = [box[1] for box in review_boxes]
112
- accepts = [box[2].value for box in review_boxes]
113
- return finalize_text(originals, suggestions, accepts)
114
-
115
- # Wire events
116
- extract_btn.click(fn=extract_both, inputs=[blog_url, image_url], outputs=combined_text)
117
- review_btn.click(fn=do_review, inputs=combined_text, outputs=review_area)
118
- finalize_btn.click(fn=collect_results, outputs=final_output)
119
-
120
- app.launch()
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
2
  import requests
3
+ from PIL import Image
4
+ import pytesseract
5
  import difflib
6
+ from io import BytesIO
7
+ from transformers import pipeline
8
+ import trafilatura
9
+ from nltk.tokenize import sent_tokenize
10
+ import nltk
11
 
12
+ nltk.download("punkt")
 
13
 
14
+ # === Load AI model ===
15
+ reviewer = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", max_new_tokens=200)
16
+ device = "cpu"
17
+ print(f"Device set to use {device}")
18
 
19
+ # === Utility: Highlight diffs ===
20
+ def highlight_diff(original, suggestion):
21
+ diff = difflib.ndiff(original.split(), suggestion.split())
22
+ result = ""
23
+ for word in diff:
24
+ if word.startswith("- "):
25
+ result += f"<span style='color:red;text-decoration:line-through'>{word[2:]}</span> "
26
+ elif word.startswith("+ "):
27
+ result += f"<span style='color:green;font-weight:bold'>{word[2:]}</span> "
28
+ elif word.startswith(" "):
29
+ result += word[2:] + " "
30
+ return result.strip()
31
+
32
+ # === Extract blog content from URL ===
33
+ def extract_text_from_url(url):
34
  try:
35
+ headers = {"User-Agent": "Mozilla/5.0"}
36
+ response = requests.get(url, headers=headers, timeout=10)
37
+ if response.status_code == 200:
38
+ return trafilatura.extract(response.text)
39
+ else:
40
+ return f"❌ Blog Error: HTTP {response.status_code} on URL {url}"
41
+ except Exception as e:
42
+ return f"❌ Blog Error: {e}"
43
+
44
+ # === Extract text from image URL (OCR) ===
45
+ def extract_text_from_image(image_url):
46
+ try:
47
+ img_data = requests.get(image_url).content
48
+ image = Image.open(BytesIO(img_data)).convert("L")
49
+ text = pytesseract.image_to_string(image)
50
+ return text if text.strip() else "❌ OCR Error: No readable text found."
51
  except Exception as e:
52
  return f"❌ OCR Error: {e}"
53
 
54
+ # === Suggestion generator ===
55
+ def generate_suggestions(text):
56
+ sentences = sent_tokenize(text)
57
+ suggestions = []
58
+ for sent in sentences:
59
+ prompt = f"Improve the tone, grammar, clarity and flag any sensitive content:\n\n{sent}"
60
+ output = reviewer(prompt, max_new_tokens=200)[0]["generated_text"]
61
+ cleaned = output.replace(prompt, "").strip()
62
+ suggestions.append(cleaned if cleaned else sent)
63
+ return sentences, suggestions
64
+
65
+ # === Final approval handler ===
66
+ def collect_decisions(originals, suggestions, *choices):
67
+ results = []
68
+ for orig, sugg, choice in zip(originals, suggestions, choices):
69
+ results.append(sugg if choice == "Accept" else orig)
70
+ return "\n".join(results)
71
+
72
+ # === Gradio UI ===
73
+ with gr.Blocks() as demo:
74
+ gr.Markdown("# ✨ Blog Reviewer AI")
75
+ gr.Markdown("Detect tone issues, errors, and sensitive content β€” and clean them interactively!")
76
+
77
+ with gr.Tab("πŸ”— From Blog URL"):
78
+ blog_url = gr.Textbox(label="Enter blog URL")
79
+ fetch_btn = gr.Button("Fetch & Review")
80
+
81
+ with gr.Tab("πŸ–ΌοΈ From Image URL (OCR)"):
82
+ image_url = gr.Textbox(label="Enter Image URL")
83
+ image_btn = gr.Button("Extract & Review")
84
+
85
+ with gr.Tab("πŸ“ Paste Text"):
86
+ pasted_text = gr.Textbox(label="Paste blog content here", lines=10)
87
+ paste_btn = gr.Button("Review Text")
88
+
89
+ output_section = gr.Column(visible=False)
90
+ originals = gr.State([])
91
+ suggestions = gr.State([])
92
+ decision_radios = []
93
+
94
+ view_mode = gr.Radio(["Original", "Suggestion", "Side-by-Side"], value="Side-by-Side", label="Choose View")
95
+ final_output = gr.Textbox(label="βœ… Final Output", lines=12)
96
+ finalize_btn = gr.Button("Generate Clean Version")
97
+
98
+ sentence_blocks = []
99
+
100
+ # === Show suggestions UI ===
101
+ def show_review(text):
102
+ origs, suggs = generate_suggestions(text)
103
+ originals.value = origs
104
+ suggestions.value = suggs
105
+ return origs, suggs, True
106
+
107
+ # === Populate sentence review rows dynamically ===
108
+ def populate_review_ui(origs, suggs):
109
+ global decision_radios, sentence_blocks
110
+ decision_radios = []
111
+ sentence_blocks = []
112
+
113
+ ui_blocks = []
114
+ for i, (orig, sugg) in enumerate(zip(origs, suggs)):
115
+ orig_md = gr.Markdown(f"<b>{orig}</b>", visible=False)
116
+ sugg_md = gr.Markdown(f"<b>{sugg}</b>", visible=False)
117
+ diff_md = gr.Markdown(highlight_diff(orig, sugg), visible=True)
118
+
119
+ radio = gr.Radio(["Accept", "Reject"], value="Accept", label=f"Suggestion {i+1}")
120
+ decision_radios.append(radio)
121
+ sentence_blocks.append((orig_md, sugg_md, diff_md))
122
+
123
+ ui_blocks.extend([orig_md, sugg_md, diff_md, radio])
124
+ return ui_blocks
125
+
126
+ # === Toggle view mode ===
127
+ def toggle_view(view):
128
+ updates = []
129
+ for orig_md, sugg_md, diff_md in sentence_blocks:
130
+ if view == "Original":
131
+ updates.extend([gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)])
132
+ elif view == "Suggestion":
133
+ updates.extend([gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)])
134
+ else: # Side-by-side
135
+ updates.extend([gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)])
136
+ return updates
137
+
138
+ # === Final output handler ===
139
+ def finalize_output(origs, suggs, *choices):
140
+ return collect_decisions(origs, suggs, *choices)
141
+
142
+ # Button click handlers
143
+ fetch_btn.click(fn=extract_text_from_url, inputs=blog_url, outputs=pasted_text)
144
+ image_btn.click(fn=extract_text_from_image, inputs=image_url, outputs=pasted_text)
145
+
146
+ paste_btn.click(fn=show_review, inputs=pasted_text, outputs=[originals, suggestions, output_section])
147
+
148
+ # Dynamic render trigger
149
+ originals.change(fn=populate_review_ui, inputs=[originals, suggestions], outputs=[])
150
+
151
+ view_mode.change(fn=toggle_view, inputs=view_mode,
152
+ outputs=[item for block in sentence_blocks for item in block])
153
+
154
+ finalize_btn.click(fn=finalize_output, inputs=[originals, suggestions] + decision_radios, outputs=final_output)
155
+
156
+ demo.launch()