JaishnaCodz commited on
Commit
b8a6b71
·
verified ·
1 Parent(s): d478d45

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -62
app.py CHANGED
@@ -1,81 +1,84 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
- from newspaper import Article
4
- import nltk
5
- from nltk.tokenize import sent_tokenize
6
  import re
7
 
8
- nltk.download('punkt')
 
 
9
 
10
- # Load grammar correction and toxicity detection models
11
  grammar_corrector = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
12
- toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
13
 
14
- # Functions
15
-
16
- def extract_text(input_type, text_input, url_input):
17
- if input_type == "URL" and url_input:
18
- article = Article(url_input)
19
- article.download()
20
- article.parse()
21
- return article.text
22
- return text_input
23
-
24
- def check_grammar(text):
25
- result = grammar_corrector(text, max_length=512, do_sample=False)
26
- return result[0]['generated_text']
27
 
28
  def detect_sensitive_content(text):
 
29
  sentences = sent_tokenize(text)
30
- sensitive = []
31
- for i, sentence in enumerate(sentences):
32
- result = toxicity_classifier(sentence)
33
- if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
34
- sensitive.append({"sentence": sentence, "score": result[0]['score'], "index": i})
35
- return sensitive
36
-
37
- def highlight_text(original, corrected, sensitive_issues):
38
- highlighted = corrected
39
- for issue in sensitive_issues:
40
- sent = issue['sentence']
41
- highlighted = highlighted.replace(sent, f"<span style='background-color: red'>{sent}</span>")
42
- diff_words = [(o, c) for o, c in zip(original.split(), corrected.split()) if o != c]
43
- for o, c in diff_words:
44
- highlighted = highlighted.replace(c, f"<span style='background-color: yellow'>{c}</span>")
45
- return highlighted
46
-
47
- def review_blog(input_type, text_input, url_input):
48
- if not text_input and not url_input:
49
- return "Please provide input text or a URL.", ""
50
- raw_text = extract_text(input_type, text_input, url_input)
51
- corrected = check_grammar(raw_text)
52
- sensitive = detect_sensitive_content(corrected)
53
- highlighted = highlight_text(raw_text, corrected, sensitive)
54
- return highlighted, corrected
55
 
56
- # Gradio UI
57
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
58
- gr.Markdown("# 🖊️ AI Blog Reviewer")
59
- gr.Markdown("""Highlights:
60
- - <span style='background-color: yellow'>**Yellow:** Grammar corrections</span><br>
61
- - <span style='background-color: red'>**Red:** Sensitive or toxic content</span>""", elem_id="legend")
62
 
63
- input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
64
- text_input = gr.Textbox(label="Blog Text", lines=10, visible=True)
65
- url_input = gr.Textbox(label="Blog URL", visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
- def toggle_input(choice):
68
- return {
69
- text_input: gr.update(visible=choice == "Text"),
70
- url_input: gr.update(visible=choice == "URL")
71
- }
72
 
73
- input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])
 
74
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  review_btn = gr.Button("Review Blog")
76
- html_output = gr.HTML(label="Highlighted Output")
77
- final_output = gr.Textbox(label="Corrected Blog", lines=10)
78
 
79
- review_btn.click(fn=review_blog, inputs=[input_type, text_input, url_input], outputs=[html_output, final_output])
 
 
 
 
 
80
 
81
  demo.launch()
 
1
  import gradio as gr
2
  from transformers import pipeline
3
+ from nltk import download, sent_tokenize
 
 
4
  import re
5
 
6
+ # Download necessary NLTK models
7
+ download('punkt')
8
+ download('punkt_tab') # Fixes the recent error with PunktTokenizer
9
 
10
+ # Load the grammar correction model (T5 based)
11
  grammar_corrector = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
 
12
 
13
+ # Define sensitive/toxic keyword patterns (you can extend this list)
14
+ sensitive_keywords = [
15
+ r"\bhate\b", r"\bstupid\b", r"\bidiot\b", r"\btrash\b", r"\bkill\b",
16
+ r"\bnot allowed\b", r"\bnobody cares\b", r"\bterrorist\b", r"\bgo back\b",
17
+ r"\bimmigrants\b", r"\bslur\b", r"\bdisgusting\b"
18
+ ]
 
 
 
 
 
 
 
19
 
20
  def detect_sensitive_content(text):
21
+ # Tokenize into sentences
22
  sentences = sent_tokenize(text)
23
+ highlighted = []
24
+ for sentence in sentences:
25
+ flagged = False
26
+ for pattern in sensitive_keywords:
27
+ if re.search(pattern, sentence, re.IGNORECASE):
28
+ flagged = True
29
+ break
30
+ if flagged:
31
+ sentence = f"<span style='color: red'>{sentence}</span>"
32
+ highlighted.append(sentence)
33
+ return " ".join(highlighted)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ def highlight_grammar(original, corrected):
36
+ original_words = original.split()
37
+ corrected_words = corrected.split()
38
+ highlighted = []
 
 
39
 
40
+ for orig, corr in zip(original_words, corrected_words):
41
+ if orig != corr:
42
+ highlighted.append(f"<span style='color: yellow'>{corr}</span>")
43
+ else:
44
+ highlighted.append(corr)
45
+
46
+ # Add remaining corrected words
47
+ if len(corrected_words) > len(original_words):
48
+ for word in corrected_words[len(original_words):]:
49
+ highlighted.append(f"<span style='color: yellow'>{word}</span>")
50
+
51
+ return " ".join(highlighted)
52
+
53
+ def review_blog(input_text, input_type):
54
+ if input_type == "URL":
55
+ return "URL support is under development."
56
 
57
+ # Step 1: Grammar correction
58
+ corrected_output = grammar_corrector(input_text, max_length=512, do_sample=False)[0]["generated_text"]
 
 
 
59
 
60
+ # Step 2: Highlight grammar issues
61
+ grammar_highlighted = highlight_grammar(input_text, corrected_output)
62
 
63
+ # Step 3: Highlight sensitive content
64
+ sensitive_highlighted = detect_sensitive_content(corrected_output)
65
+
66
+ return gr.update(value=grammar_highlighted), gr.update(value=sensitive_highlighted)
67
+
68
+ # Gradio UI
69
+ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
70
+ gr.Markdown("📝 **AI Blog Reviewer**")
71
+ gr.Markdown("#### Highlights:\n- <span style='color: yellow'>Yellow</span>: Grammar corrections\n- <span style='color: red'>Red</span>: Sensitive or toxic content")
72
+
73
+ input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
74
+ blog_input = gr.Textbox(lines=8, label="Blog Text", placeholder="Paste your blog content here...")
75
  review_btn = gr.Button("Review Blog")
 
 
76
 
77
+ gr.Markdown("#### 🔧 Grammar Corrections:")
78
+ grammar_output = gr.HTML()
79
+ gr.Markdown("#### 🚨 Sensitive/Toxic Content:")
80
+ sensitive_output = gr.HTML()
81
+
82
+ review_btn.click(review_blog, inputs=[blog_input, input_type], outputs=[grammar_output, sensitive_output])
83
 
84
  demo.launch()