JaishnaCodz commited on
Commit
b63764b
Β·
verified Β·
1 Parent(s): 5873b0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -50
app.py CHANGED
@@ -10,7 +10,7 @@ import difflib
10
  # Load model
11
  reviewer = pipeline("text2text-generation", model="google/flan-t5-base")
12
 
13
- # OCR
14
  def extract_text_from_image_url(img_url):
15
  try:
16
  response = requests.get(img_url)
@@ -20,7 +20,7 @@ def extract_text_from_image_url(img_url):
20
  except Exception as e:
21
  return f"❌ OCR Error: {e}"
22
 
23
- # Extract blog
24
  def extract_text_from_url(url):
25
  downloaded = trafilatura.fetch_url(url)
26
  if downloaded:
@@ -28,42 +28,42 @@ def extract_text_from_url(url):
28
  else:
29
  return "❌ Blog Error: Could not fetch content from the URL."
30
 
31
- # Highlight differences using difflib
32
- def highlight_diffs(original, suggestion):
33
- diff = difflib.ndiff(original.split(), suggestion.split())
34
  result = []
35
- for token in diff:
36
- if token.startswith("- "):
37
- result.append(f"~~{token[2:]}~~")
38
- elif token.startswith("+ "):
39
- result.append(f"**{token[2:]}**")
40
- elif token.startswith(" "):
41
- result.append(token[2:])
42
  return " ".join(result)
43
 
44
- # Review lines with diffs
45
  def review_lines(text):
46
  lines = text.strip().split('\n')
47
- suggestions = []
48
  for line in lines:
49
- if line.strip() == "":
50
  continue
51
- prompt = f"Rewrite this to fix grammar, tone, and remove any offensive language:\n\n{line}"
52
- suggestion = reviewer(prompt, max_new_tokens=100)[0]['generated_text'].strip()
53
- highlighted = highlight_diffs(line.strip(), suggestion)
54
- suggestions.append((line, highlighted, suggestion))
55
- return suggestions
56
 
57
  # Finalize accepted suggestions
58
  def finalize_text(originals, suggestions, decisions):
59
- final = []
60
- for orig, sugg, keep in zip(originals, suggestions, decisions):
61
- final.append(sugg if keep else orig)
62
- return "\n".join(final)
63
 
64
- # Gradio app
65
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as app:
66
- gr.Markdown("## ✨ BlogChecker AI\nSmart AI reviewer for blog content, with interactive approval and OCR image support.")
67
 
68
  with gr.Row():
69
  blog_url = gr.Textbox(label="πŸ“Ž Blog URL")
@@ -76,41 +76,40 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as app:
76
  review_btn = gr.Button("🧠 Review Content")
77
  finalize_btn = gr.Button("βœ… Finalize Clean Blog")
78
 
79
- review_section = gr.Column(visible=False)
80
- review_boxes = [] # Will store tuples: (original_box, highlighted_markdown_box, accept_checkbox, clean_suggestion)
81
-
82
  final_output = gr.Textbox(label="πŸ“¦ Final Clean Blog", lines=10)
83
 
84
- # Text extraction logic
 
 
85
  def extract_both(url, img_url):
86
  blog = extract_text_from_url(url)
87
  ocr = extract_text_from_image_url(img_url) if img_url else ""
88
  return blog + ("\n" + ocr if ocr else "")
89
 
90
- # Review processing with diffs
91
- def process_review(text):
92
  results = review_lines(text)
93
- review_section.children.clear()
94
  review_boxes.clear()
95
-
96
- for i, (orig, highlighted, clean_sugg) in enumerate(results):
97
- with review_section:
98
- orig_box = gr.Textbox(value=orig, label=f"Original Line {i+1}", interactive=False)
99
- markdown_sugg = gr.Markdown(value=highlighted, label=f"Suggested Edit {i+1}")
100
- accept_box = gr.Checkbox(label="βœ… Accept Suggestion", value=False)
101
- review_boxes.append((orig_box, markdown_sugg, accept_box, clean_sugg))
102
  return gr.update(visible=True)
103
 
104
- # Finalization logic
105
- def collect_dynamic_decisions():
106
- originals_vals = [box[0].value for box in review_boxes]
107
- clean_suggestions = [box[3] for box in review_boxes]
108
- accepts_vals = [box[2].value for box in review_boxes]
109
- return finalize_text(originals_vals, clean_suggestions, accepts_vals)
110
 
111
- # Wire actions
112
  extract_btn.click(fn=extract_both, inputs=[blog_url, image_url], outputs=combined_text)
113
- review_btn.click(fn=process_review, inputs=combined_text, outputs=review_section)
114
- finalize_btn.click(fn=collect_dynamic_decisions, outputs=final_output)
115
 
116
  app.launch()
 
10
  # Load model
11
  reviewer = pipeline("text2text-generation", model="google/flan-t5-base")
12
 
13
+ # OCR from image URL
14
  def extract_text_from_image_url(img_url):
15
  try:
16
  response = requests.get(img_url)
 
20
  except Exception as e:
21
  return f"❌ OCR Error: {e}"
22
 
23
+ # Extract main blog content from URL
24
  def extract_text_from_url(url):
25
  downloaded = trafilatura.fetch_url(url)
26
  if downloaded:
 
28
  else:
29
  return "❌ Blog Error: Could not fetch content from the URL."
30
 
31
+ # Highlight diffs using difflib
32
+ def highlight_diffs(orig, suggestion):
33
+ diff = difflib.ndiff(orig.split(), suggestion.split())
34
  result = []
35
+ for word in diff:
36
+ if word.startswith('- '):
37
+ result.append(f"~~{word[2:]}~~")
38
+ elif word.startswith('+ '):
39
+ result.append(f"**{word[2:]}**")
40
+ elif word.startswith(' '):
41
+ result.append(word[2:])
42
  return " ".join(result)
43
 
44
+ # Review line-by-line
45
  def review_lines(text):
46
  lines = text.strip().split('\n')
47
+ reviewed = []
48
  for line in lines:
49
+ if not line.strip():
50
  continue
51
+ prompt = f"Fix grammar, tone, and clarity:\n\n{line}"
52
+ response = reviewer(prompt, max_new_tokens=100)[0]['generated_text'].strip()
53
+ highlighted = highlight_diffs(line.strip(), response)
54
+ reviewed.append((line.strip(), highlighted, response))
55
+ return reviewed
56
 
57
  # Finalize accepted suggestions
58
  def finalize_text(originals, suggestions, decisions):
59
+ output = []
60
+ for orig, sugg, accepted in zip(originals, suggestions, decisions):
61
+ output.append(sugg if accepted else orig)
62
+ return "\n".join(output)
63
 
64
+ # Build Gradio UI
65
+ with gr.Blocks(theme=gr.themes.Soft()) as app:
66
+ gr.Markdown("## ✨ BlogChecker AI\nSmart blog reviewer with OCR + AI suggestions")
67
 
68
  with gr.Row():
69
  blog_url = gr.Textbox(label="πŸ“Ž Blog URL")
 
76
  review_btn = gr.Button("🧠 Review Content")
77
  finalize_btn = gr.Button("βœ… Finalize Clean Blog")
78
 
79
+ review_area = gr.Column(visible=False)
 
 
80
  final_output = gr.Textbox(label="πŸ“¦ Final Clean Blog", lines=10)
81
 
82
+ review_boxes = []
83
+
84
+ # Extract combined content
85
  def extract_both(url, img_url):
86
  blog = extract_text_from_url(url)
87
  ocr = extract_text_from_image_url(img_url) if img_url else ""
88
  return blog + ("\n" + ocr if ocr else "")
89
 
90
+ # Review and build UI dynamically
91
+ def do_review(text):
92
  results = review_lines(text)
93
+ review_area.children.clear()
94
  review_boxes.clear()
95
+ for idx, (orig, highlighted, clean) in enumerate(results):
96
+ with review_area:
97
+ orig_box = gr.Textbox(value=orig, label=f"Original Line {idx+1}", interactive=False)
98
+ markdown_sugg = gr.Markdown(value=highlighted, label=f"Suggested Edit {idx+1}")
99
+ accept = gr.Checkbox(label="βœ… Accept Suggestion", value=False)
100
+ review_boxes.append((orig_box, clean, accept))
 
101
  return gr.update(visible=True)
102
 
103
+ # Compile final clean version
104
+ def collect_results():
105
+ originals = [box[0].value for box in review_boxes]
106
+ suggestions = [box[1] for box in review_boxes]
107
+ accepts = [box[2].value for box in review_boxes]
108
+ return finalize_text(originals, suggestions, accepts)
109
 
110
+ # Wire buttons
111
  extract_btn.click(fn=extract_both, inputs=[blog_url, image_url], outputs=combined_text)
112
+ review_btn.click(fn=do_review, inputs=combined_text, outputs=review_area)
113
+ finalize_btn.click(fn=collect_results, outputs=final_output)
114
 
115
  app.launch()