Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,81 +1,84 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline
|
3 |
-
from
|
4 |
-
import nltk
|
5 |
-
from nltk.tokenize import sent_tokenize
|
6 |
import re
|
7 |
|
8 |
-
|
|
|
|
|
9 |
|
10 |
-
# Load grammar correction
|
11 |
grammar_corrector = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
|
12 |
-
toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
|
13 |
|
14 |
-
#
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
article.parse()
|
21 |
-
return article.text
|
22 |
-
return text_input
|
23 |
-
|
24 |
-
def check_grammar(text):
|
25 |
-
result = grammar_corrector(text, max_length=512, do_sample=False)
|
26 |
-
return result[0]['generated_text']
|
27 |
|
28 |
def detect_sensitive_content(text):
|
|
|
29 |
sentences = sent_tokenize(text)
|
30 |
-
|
31 |
-
for
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
highlighted = highlighted.replace(sent, f"<span style='background-color: red'>{sent}</span>")
|
42 |
-
diff_words = [(o, c) for o, c in zip(original.split(), corrected.split()) if o != c]
|
43 |
-
for o, c in diff_words:
|
44 |
-
highlighted = highlighted.replace(c, f"<span style='background-color: yellow'>{c}</span>")
|
45 |
-
return highlighted
|
46 |
-
|
47 |
-
def review_blog(input_type, text_input, url_input):
|
48 |
-
if not text_input and not url_input:
|
49 |
-
return "Please provide input text or a URL.", ""
|
50 |
-
raw_text = extract_text(input_type, text_input, url_input)
|
51 |
-
corrected = check_grammar(raw_text)
|
52 |
-
sensitive = detect_sensitive_content(corrected)
|
53 |
-
highlighted = highlight_text(raw_text, corrected, sensitive)
|
54 |
-
return highlighted, corrected
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
- <span style='background-color: yellow'>**Yellow:** Grammar corrections</span><br>
|
61 |
-
- <span style='background-color: red'>**Red:** Sensitive or toxic content</span>""", elem_id="legend")
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
text_input: gr.update(visible=choice == "Text"),
|
70 |
-
url_input: gr.update(visible=choice == "URL")
|
71 |
-
}
|
72 |
|
73 |
-
|
|
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
review_btn = gr.Button("Review Blog")
|
76 |
-
html_output = gr.HTML(label="Highlighted Output")
|
77 |
-
final_output = gr.Textbox(label="Corrected Blog", lines=10)
|
78 |
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline
|
3 |
+
from nltk import download, sent_tokenize
|
|
|
|
|
4 |
import re
|
5 |
|
6 |
+
# Download necessary NLTK models
|
7 |
+
download('punkt')
|
8 |
+
download('punkt_tab') # Fixes the recent error with PunktTokenizer
|
9 |
|
10 |
+
# Load the grammar correction model (T5 based)
|
11 |
grammar_corrector = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
|
|
|
12 |
|
13 |
+
# Define sensitive/toxic keyword patterns (you can extend this list)
|
14 |
+
sensitive_keywords = [
|
15 |
+
r"\bhate\b", r"\bstupid\b", r"\bidiot\b", r"\btrash\b", r"\bkill\b",
|
16 |
+
r"\bnot allowed\b", r"\bnobody cares\b", r"\bterrorist\b", r"\bgo back\b",
|
17 |
+
r"\bimmigrants\b", r"\bslur\b", r"\bdisgusting\b"
|
18 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
def detect_sensitive_content(text):
|
21 |
+
# Tokenize into sentences
|
22 |
sentences = sent_tokenize(text)
|
23 |
+
highlighted = []
|
24 |
+
for sentence in sentences:
|
25 |
+
flagged = False
|
26 |
+
for pattern in sensitive_keywords:
|
27 |
+
if re.search(pattern, sentence, re.IGNORECASE):
|
28 |
+
flagged = True
|
29 |
+
break
|
30 |
+
if flagged:
|
31 |
+
sentence = f"<span style='color: red'>{sentence}</span>"
|
32 |
+
highlighted.append(sentence)
|
33 |
+
return " ".join(highlighted)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
+
def highlight_grammar(original, corrected):
|
36 |
+
original_words = original.split()
|
37 |
+
corrected_words = corrected.split()
|
38 |
+
highlighted = []
|
|
|
|
|
39 |
|
40 |
+
for orig, corr in zip(original_words, corrected_words):
|
41 |
+
if orig != corr:
|
42 |
+
highlighted.append(f"<span style='color: yellow'>{corr}</span>")
|
43 |
+
else:
|
44 |
+
highlighted.append(corr)
|
45 |
+
|
46 |
+
# Add remaining corrected words
|
47 |
+
if len(corrected_words) > len(original_words):
|
48 |
+
for word in corrected_words[len(original_words):]:
|
49 |
+
highlighted.append(f"<span style='color: yellow'>{word}</span>")
|
50 |
+
|
51 |
+
return " ".join(highlighted)
|
52 |
+
|
53 |
+
def review_blog(input_text, input_type):
|
54 |
+
if input_type == "URL":
|
55 |
+
return "URL support is under development."
|
56 |
|
57 |
+
# Step 1: Grammar correction
|
58 |
+
corrected_output = grammar_corrector(input_text, max_length=512, do_sample=False)[0]["generated_text"]
|
|
|
|
|
|
|
59 |
|
60 |
+
# Step 2: Highlight grammar issues
|
61 |
+
grammar_highlighted = highlight_grammar(input_text, corrected_output)
|
62 |
|
63 |
+
# Step 3: Highlight sensitive content
|
64 |
+
sensitive_highlighted = detect_sensitive_content(corrected_output)
|
65 |
+
|
66 |
+
return gr.update(value=grammar_highlighted), gr.update(value=sensitive_highlighted)
|
67 |
+
|
68 |
+
# Gradio UI
|
69 |
+
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
|
70 |
+
gr.Markdown("📝 **AI Blog Reviewer**")
|
71 |
+
gr.Markdown("#### Highlights:\n- <span style='color: yellow'>Yellow</span>: Grammar corrections\n- <span style='color: red'>Red</span>: Sensitive or toxic content")
|
72 |
+
|
73 |
+
input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
|
74 |
+
blog_input = gr.Textbox(lines=8, label="Blog Text", placeholder="Paste your blog content here...")
|
75 |
review_btn = gr.Button("Review Blog")
|
|
|
|
|
76 |
|
77 |
+
gr.Markdown("#### 🔧 Grammar Corrections:")
|
78 |
+
grammar_output = gr.HTML()
|
79 |
+
gr.Markdown("#### 🚨 Sensitive/Toxic Content:")
|
80 |
+
sensitive_output = gr.HTML()
|
81 |
+
|
82 |
+
review_btn.click(review_blog, inputs=[blog_input, input_type], outputs=[grammar_output, sensitive_output])
|
83 |
|
84 |
demo.launch()
|