Spaces:

JaishnaCodz
/

AI-Blog-Reviewer

Running

App Files Files Community

JaishnaCodz commited on Jul 16

Commit

b8a6b71

verified ·

1 Parent(s): d478d45

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -62

app.py CHANGED Viewed

@@ -1,81 +1,84 @@
 import gradio as gr
 from transformers import pipeline
-from newspaper import Article
-import nltk
-from nltk.tokenize import sent_tokenize
 import re
-nltk.download('punkt')
-# Load grammar correction and toxicity detection models
 grammar_corrector = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
-toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
-# Functions
-def extract_text(input_type, text_input, url_input):
-    if input_type == "URL" and url_input:
-        article = Article(url_input)
-        article.download()
-        article.parse()
-        return article.text
-    return text_input
-def check_grammar(text):
-    result = grammar_corrector(text, max_length=512, do_sample=False)
-    return result[0]['generated_text']
 def detect_sensitive_content(text):
     sentences = sent_tokenize(text)
-    sensitive = []
-    for i, sentence in enumerate(sentences):
-        result = toxicity_classifier(sentence)
-        if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
-            sensitive.append({"sentence": sentence, "score": result[0]['score'], "index": i})
-    return sensitive
-def highlight_text(original, corrected, sensitive_issues):
-    highlighted = corrected
-    for issue in sensitive_issues:
-        sent = issue['sentence']
-        highlighted = highlighted.replace(sent, f"<span style='background-color: red'>{sent}</span>")
-    diff_words = [(o, c) for o, c in zip(original.split(), corrected.split()) if o != c]
-    for o, c in diff_words:
-        highlighted = highlighted.replace(c, f"<span style='background-color: yellow'>{c}</span>")
-    return highlighted
-def review_blog(input_type, text_input, url_input):
-    if not text_input and not url_input:
-        return "Please provide input text or a URL.", ""
-    raw_text = extract_text(input_type, text_input, url_input)
-    corrected = check_grammar(raw_text)
-    sensitive = detect_sensitive_content(corrected)
-    highlighted = highlight_text(raw_text, corrected, sensitive)
-    return highlighted, corrected
-# Gradio UI
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🖊️ AI Blog Reviewer")
-    gr.Markdown("""Highlights:
-    - <span style='background-color: yellow'>**Yellow:** Grammar corrections</span><br>
-    - <span style='background-color: red'>**Red:** Sensitive or toxic content</span>""", elem_id="legend")
-    input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
-    text_input = gr.Textbox(label="Blog Text", lines=10, visible=True)
-    url_input = gr.Textbox(label="Blog URL", visible=False)
-    def toggle_input(choice):
-        return {
-            text_input: gr.update(visible=choice == "Text"),
-            url_input: gr.update(visible=choice == "URL")
-        }
-    input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])
     review_btn = gr.Button("Review Blog")
-    html_output = gr.HTML(label="Highlighted Output")
-    final_output = gr.Textbox(label="Corrected Blog", lines=10)
-    review_btn.click(fn=review_blog, inputs=[input_type, text_input, url_input], outputs=[html_output, final_output])
 demo.launch()

 import gradio as gr
 from transformers import pipeline
+from nltk import download, sent_tokenize
 import re
+# Download necessary NLTK models
+download('punkt')
+download('punkt_tab')  # Fixes the recent error with PunktTokenizer
+# Load the grammar correction model (T5 based)
 grammar_corrector = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
+# Define sensitive/toxic keyword patterns (you can extend this list)
+sensitive_keywords = [
+    r"\bhate\b", r"\bstupid\b", r"\bidiot\b", r"\btrash\b", r"\bkill\b",
+    r"\bnot allowed\b", r"\bnobody cares\b", r"\bterrorist\b", r"\bgo back\b",
+    r"\bimmigrants\b", r"\bslur\b", r"\bdisgusting\b"
+]
 def detect_sensitive_content(text):
+    # Tokenize into sentences
     sentences = sent_tokenize(text)
+    highlighted = []
+    for sentence in sentences:
+        flagged = False
+        for pattern in sensitive_keywords:
+            if re.search(pattern, sentence, re.IGNORECASE):
+                flagged = True
+                break
+        if flagged:
+            sentence = f"<span style='color: red'>{sentence}</span>"
+        highlighted.append(sentence)
+    return " ".join(highlighted)
+def highlight_grammar(original, corrected):
+    original_words = original.split()
+    corrected_words = corrected.split()
+    highlighted = []
+    for orig, corr in zip(original_words, corrected_words):
+        if orig != corr:
+            highlighted.append(f"<span style='color: yellow'>{corr}</span>")
+        else:
+            highlighted.append(corr)
+    # Add remaining corrected words
+    if len(corrected_words) > len(original_words):
+        for word in corrected_words[len(original_words):]:
+            highlighted.append(f"<span style='color: yellow'>{word}</span>")
+    return " ".join(highlighted)
+def review_blog(input_text, input_type):
+    if input_type == "URL":
+        return "URL support is under development."
+    # Step 1: Grammar correction
+    corrected_output = grammar_corrector(input_text, max_length=512, do_sample=False)[0]["generated_text"]
+    # Step 2: Highlight grammar issues
+    grammar_highlighted = highlight_grammar(input_text, corrected_output)
+    # Step 3: Highlight sensitive content
+    sensitive_highlighted = detect_sensitive_content(corrected_output)
+    return gr.update(value=grammar_highlighted), gr.update(value=sensitive_highlighted)
+# Gradio UI
+with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
+    gr.Markdown("📝 **AI Blog Reviewer**")
+    gr.Markdown("#### Highlights:\n- <span style='color: yellow'>Yellow</span>: Grammar corrections\n- <span style='color: red'>Red</span>: Sensitive or toxic content")
+    input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
+    blog_input = gr.Textbox(lines=8, label="Blog Text", placeholder="Paste your blog content here...")
     review_btn = gr.Button("Review Blog")
+    gr.Markdown("#### 🔧 Grammar Corrections:")
+    grammar_output = gr.HTML()
+    gr.Markdown("#### 🚨 Sensitive/Toxic Content:")
+    sensitive_output = gr.HTML()
+    review_btn.click(review_blog, inputs=[blog_input, input_type], outputs=[grammar_output, sensitive_output])
 demo.launch()