Spaces:

JaishnaCodz
/

AI-Blog-Reviewer

Running

App Files Files Community

JaishnaCodz commited on Jul 16

Commit

6784902

verified ·

1 Parent(s): 3b87bdd

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -44

app.py CHANGED Viewed

@@ -3,72 +3,79 @@ from transformers import pipeline
 from newspaper import Article
 import nltk
 from nltk.tokenize import sent_tokenize
-nltk.download("punkt")
-# Load models
 grammar_corrector = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
 toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
-# Extract text from blog or URL
 def extract_text(input_type, text_input, url_input):
-    if input_type == "Text":
-        return text_input
-    try:
         article = Article(url_input)
         article.download()
         article.parse()
         return article.text
-    except Exception as e:
-        return f"Error fetching URL: {str(e)}"
-# Highlight grammar and toxic issues
-def review_blog(input_type, text_input, url_input):
-    text = extract_text(input_type, text_input, url_input)
-    if text.startswith("Error"):
-        return text, "", []
-    # Grammar correction
-    grammar_output = grammar_corrector(text, max_length=512)[0]["generated_text"]
-    # Toxic content detection
     sentences = sent_tokenize(text)
-    toxic_sentences = []
-    for sent in sentences:
-        result = toxicity_classifier(sent)[0]
-        if result["label"] == "toxic" and result["score"] > 0.7:
-            toxic_sentences.append(sent)
-    # Highlight toxic sentences
-    highlighted = text
-    for sent in toxic_sentences:
-        highlighted = highlighted.replace(sent, f"<span style='background-color:red'>{sent}</span>")
-    return highlighted, grammar_output, toxic_sentences
 # Gradio UI
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## 📝 Blog Review AI")
-    gr.Markdown("Checks for grammar & sensitive content (toxicity) in blog text or URL.")
-    input_type = gr.Radio(["Text", "URL"], value="Text", label="Input Type")
-    text_input = gr.Textbox(label="Enter blog text", lines=10, visible=True)
-    url_input = gr.Textbox(label="Enter blog URL", visible=False)
-    def toggle_input(t):
-        return {text_input: gr.update(visible=t == "Text"), url_input: gr.update(visible=t == "URL")}
-    input_type.change(toggle_input, input_type, [text_input, url_input])
-    review_btn = gr.Button("Review")
-    highlight_output = gr.HTML(label="Toxic Highlighted Text")
-    corrected_text = gr.Textbox(label="Grammar Corrected Text", lines=10)
-    toxic_list = gr.Textbox(label="Toxic Sentences Detected", lines=5)
-    review_btn.click(
-        review_blog,
-        inputs=[input_type, text_input, url_input],
-        outputs=[highlight_output, corrected_text, toxic_list]
-    )
 demo.launch()

 from newspaper import Article
 import nltk
 from nltk.tokenize import sent_tokenize
+import re
+nltk.download('punkt')
+# Load grammar correction and toxicity detection models
 grammar_corrector = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
 toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
+# Functions
 def extract_text(input_type, text_input, url_input):
+    if input_type == "URL" and url_input:
         article = Article(url_input)
         article.download()
         article.parse()
         return article.text
+    return text_input
+def check_grammar(text):
+    result = grammar_corrector(text, max_length=512, do_sample=False)
+    return result[0]['generated_text']
+def detect_sensitive_content(text):
     sentences = sent_tokenize(text)
+    sensitive = []
+    for i, sentence in enumerate(sentences):
+        result = toxicity_classifier(sentence)
+        if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
+            sensitive.append({"sentence": sentence, "score": result[0]['score'], "index": i})
+    return sensitive
+def highlight_text(original, corrected, sensitive_issues):
+    highlighted = corrected
+    for issue in sensitive_issues:
+        sent = issue['sentence']
+        highlighted = highlighted.replace(sent, f"<span style='background-color: red'>{sent}</span>")
+    diff_words = [(o, c) for o, c in zip(original.split(), corrected.split()) if o != c]
+    for o, c in diff_words:
+        highlighted = highlighted.replace(c, f"<span style='background-color: yellow'>{c}</span>")
+    return highlighted
+def review_blog(input_type, text_input, url_input):
+    if not text_input and not url_input:
+        return "Please provide input text or a URL.", ""
+    raw_text = extract_text(input_type, text_input, url_input)
+    corrected = check_grammar(raw_text)
+    sensitive = detect_sensitive_content(corrected)
+    highlighted = highlight_text(raw_text, corrected, sensitive)
+    return highlighted, corrected
 # Gradio UI
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🖊️ AI Blog Reviewer")
+    gr.Markdown("""Highlights:
+    - <span style='background-color: yellow'>**Yellow:** Grammar corrections</span><br>
+    - <span style='background-color: red'>**Red:** Sensitive or toxic content</span>""", elem_id="legend")
+    input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
+    text_input = gr.Textbox(label="Blog Text", lines=10, visible=True)
+    url_input = gr.Textbox(label="Blog URL", visible=False)
+    def toggle_input(choice):
+        return {
+            text_input: gr.update(visible=choice == "Text"),
+            url_input: gr.update(visible=choice == "URL")
+        }
+    input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])
+    review_btn = gr.Button("Review Blog")
+    html_output = gr.HTML(label="Highlighted Output")
+    final_output = gr.Textbox(label="Corrected Blog", lines=10)
+    review_btn.click(fn=review_blog, inputs=[input_type, text_input, url_input], outputs=[html_output, final_output])
 demo.launch()