Spaces:

JaishnaCodz
/

AI-Blog-Reviewer

Running

App Files Files Community

JaishnaCodz commited on Jul 16

Commit

56f7cbb

verified ·

1 Parent(s): 3ee9037

Update app.py

Browse files

Files changed (1) hide show

app.py +120 -220

app.py CHANGED Viewed

@@ -1,252 +1,152 @@
 import gradio as gr
-import language_tool_python
-import requests
-from newspaper import Article
 from transformers import pipeline
-import re
 import nltk
 from nltk.tokenize import sent_tokenize
-from autogen import AssistantAgent, UserProxyAgent
-# Download required NLTK data at startup
-try:
-    nltk.download('punkt')
-    nltk.download('punkt_tab')  # Ensure language-specific data is downloaded
-    print("NLTK data (punkt and punkt_tab) downloaded successfully.")
-except Exception as e:
-    print(f"Error downloading NLTK data: {str(e)}")
-# Initialize Hugging Face models
-try:
-    toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
-    print("Toxicity classifier loaded successfully.")
-except Exception as e:
-    print(f"Error loading toxicity classifier: {str(e)}")
-try:
-    summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
-    print("Summarizer loaded successfully.")
-except Exception as e:
-    print(f"Error loading summarizer: {str(e)}")
-# Define AutoGen Agents (for modularity, but we'll call functions directly)
-user_proxy = UserProxyAgent(
-    name="UserProxy",
-    system_message="Coordinates tasks and passes inputs to other agents.",
-    human_input_mode="NEVER",
-    code_execution_config={"work_dir": "autogen_workdir", "use_docker": False}
-)
-text_extraction_agent = AssistantAgent(
-    name="TextExtractor",
-    system_message="Extracts text from URLs or processes raw text."
-)
-grammar_check_agent = AssistantAgent(
-    name="GrammarChecker",
-    system_message="Identifies spelling and grammar errors using LanguageTool."
-)
-sensitive_content_agent = AssistantAgent(
-    name="SensitiveContentDetector",
-    system_message="Detects toxic or sensitive content (e.g., racism, gender bias)."
-)
-suggestion_agent = AssistantAgent(
-    name="SuggestionGenerator",
-    system_message="Generates suggestions to fix grammar and rephrase sensitive content."
-)
-coordinator_agent = AssistantAgent(
-    name="Coordinator",
-    system_message="Combines results, highlights issues, and formats outputs."
-)
-# Task functions
 def extract_text(input_type, text_input, url_input):
-    try:
-        if input_type == "URL" and url_input:
-            article = Article(url_input)
-            article.download()
-            article.parse()
-            print(f"Extracted text from URL: {url_input[:50]}...")  # Debug
-            return article.text
-        print(f"Using input text: {text_input[:50]}...")  # Debug
-        return text_input
-    except Exception as e:
-        print(f"Error in extract_text: {str(e)}")
-        return f"Error fetching URL or processing text: {str(e)}"
 def check_grammar(text):
-    try:
-        grammar_tool = language_tool_python.LanguageToolPublicAPI('en-US')
-        print("Checking grammar...")  # Debug
-        matches = grammar_tool.check(text)
-        return [
-            {
-                "text": match.context,
-                "error": match.message,
-                "suggestions": match.replacements,
-                "offset": match.offset,
-                "length": match.errorLength
-            } for match in matches
-        ]
-    except Exception as e:
-        print(f"Grammar check failed: {str(e)}")
-        return [{"text": "", "error": f"Grammar check failed: {str(e)}", "suggestions": [], "offset": 0, "length": 0}]
 def detect_sensitive_content(text):
-    try:
-        sentences = sent_tokenize(text)
-        sensitive_issues = []
-        for i, sentence in enumerate(sentences):
-            result = toxicity_classifier(sentence)
-            if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
-                sensitive_issues.append({
-                    "sentence": sentence,
-                    "score": result[0]['score'],
-                    "index": i
-                })
-        print(f"Detected {len(sensitive_issues)} sensitive issues.")  # Debug
-        return sensitive_issues
-    except Exception as e:
-        print(f"Error in detect_sensitive_content: {str(e)}")
-        return []
 def generate_suggestions(text, grammar_issues, sensitive_issues):
-    try:
-        suggestions = []
-        for issue in grammar_issues:
-            if issue['suggestions']:
-                suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})")
-        for issue in sensitive_issues:
-            summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text']
-            suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})")
-        print(f"Generated {len(suggestions)} suggestions.")  # Debug
-        return suggestions
-    except Exception as e:
-        print(f"Error in generate_suggestions: {str(e)}")
-        return []
 def highlight_text(text, grammar_issues, sensitive_issues):
-    try:
-        highlighted = text
-        offset_adjust = 0
-        for issue in grammar_issues:
-            start = issue['offset'] + offset_adjust
-            end = start + issue['length']
-            error_text = highlighted[start:end]
-            highlighted = highlighted[:start] + f"<span style='background-color: yellow'>{error_text}</span>" + highlighted[end:]
-            offset_adjust += len("<span style='background-color: yellow'>") + len("</span>")
-        sentences = sent_tokenize(text)
-        offset_adjust = 0
-        for issue in sensitive_issues:
-            sentence = issue['sentence']
-            start = highlighted.find(sentence, offset_adjust)
-            if start != -1:
-                end = start + len(sentence)
-                highlighted = highlighted[:start] + f"<span style='background-color: red'>{sentence}</span>" + highlighted[end:]
-                offset_adjust = end
-        return highlighted
-    except Exception as e:
-        print(f"Error in highlight_text: {str(e)}")
-        return text
-# Main function to process input
-def review_blog(input_type, text_input, url_input):
-    try:
-        if not text_input and not url_input:
-            return "Please provide text or a URL.", "", []
-        # Step 1: Text Extraction
-        text = extract_text(input_type, text_input, url_input)
-        print(f"Processed text: {text[:50]}...")  # Debug
-        if text.startswith("Error"):
-            return text, "", []
-        # Step 2: Grammar Check
-        grammar_issues = check_grammar(text)
-        # Step 3: Sensitive Content Detection
-        sensitive_issues = detect_sensitive_content(text)
-        # Step 4: Generate Suggestions
-        suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
-        # Step 5: Coordinate Output
-        highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
-        suggestions_text = "\n".join([f"{i+1}. {sug}" for i, sug in enumerate(suggestions)])
-        return highlighted_text, suggestions_text, suggestions
-    except Exception as e:
-        print(f"Error in review_blog: {str(e)}")
-        return f"Error processing input: {str(e)}", "", []
 def apply_changes(text, suggestions, approved_indices):
-    try:
-        sentences = sent_tokenize(text)
-        for idx in approved_indices.split(','):
-            try:
-                idx = int(idx.strip()) - 1
-                if idx < len(suggestions):
-                    suggestion = suggestions[idx]
-                    match = re.search(r"'([^']+)'$", suggestion)
-                    if match:
-                        new_text = match.group(1)
-                        if "Rephrase sensitive content" in suggestion:
-                            orig_match = re.search(r"'([^']+)'\s+to:", suggestion)
-                            if orig_match:
-                                orig_sentence = orig_match.group(1)
-                                text = text.replace(orig_sentence, new_text)
-                        else:
-                            orig_match = re.search(r"Replace '([^']+)'\s+with\s+'([^']+)'", suggestion)
-                            if orig_match:
-                                orig_text = orig_match.group(1)
-                                text = text.replace(orig_text, new_text)
-            except ValueError:
-                continue  # Skip invalid indices
-        return text
-    except Exception as e:
-        print(f"Error in apply_changes: {str(e)}")
-        return text
-# Gradio interface
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# AI Blog Reviewer with AutoGen")
-    gr.Markdown("Enter blog text or a URL to review for spelling, grammar, and sensitive content. Approve suggested changes to update the text.")
     input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
-    text_input = gr.Textbox(label="Blog Text", placeholder="Enter your blog text here...", lines=10, visible=True)
-    url_input = gr.Textbox(label="Blog URL", placeholder="Enter the blog URL here...", visible=False)
-    def toggle_input(input_type):
         return {
-            text_input: gr.update(visible=input_type == "Text"),
-            url_input: gr.update(visible=input_type == "URL")
         }
     input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])
-    review_button = gr.Button("Review Content")
-    highlighted_output = gr.HTML(label="Highlighted Issues (Yellow: Grammar, Red: Sensitive)")
-    suggestions_output = gr.Textbox(label="Suggestions", lines=10)
-    approve_indices = gr.Textbox(label="Approve Suggestions (Enter numbers, e.g., '1,2,3')")
-    apply_button = gr.Button("Apply Approved Changes")
-    final_text = gr.Textbox(label="Final Text", lines=10)
     suggestions_state = gr.State()
-    review_button.click(
-        fn=review_blog,
-        inputs=[input_type, text_input, url_input],
-        outputs=[highlighted_output, suggestions_output, suggestions_state]
-    )
-    apply_button.click(
-        fn=apply_changes,
-        inputs=[text_input, suggestions_state, approve_indices],
-        outputs=final_text
-    )
-# Launch Gradio interface
-demo.launch()

 import gradio as gr
 from transformers import pipeline
+from newspaper import Article
+import language_tool_python
 import nltk
+import re
 from nltk.tokenize import sent_tokenize
+# Download punkt for sentence tokenization
+nltk.download("punkt")
+# Load the grammar tool
+grammar_tool = language_tool_python.LanguageToolPublicAPI('en-US')
+# Load models
+summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
+toxicity_classifier = pipeline("text-classification", model="Hate-speech-CNERG/bert-base-uncased-hatexplain")
 def extract_text(input_type, text_input, url_input):
+    if input_type == "URL" and url_input:
+        article = Article(url_input)
+        article.download()
+        article.parse()
+        return article.text
+    return text_input
 def check_grammar(text):
+    matches = grammar_tool.check(text)
+    return [
+        {
+            "text": match.context,
+            "error": match.message,
+            "suggestions": match.replacements,
+            "offset": match.offset,
+            "length": match.errorLength
+        } for match in matches
+    ]
 def detect_sensitive_content(text):
+    sentences = sent_tokenize(text)
+    sensitive_issues = []
+    for i, sentence in enumerate(sentences):
+        result = toxicity_classifier(sentence)
+        label = result[0]['label'].lower()
+        if any(term in label for term in ['toxic', 'hate', 'offensive']):
+            sensitive_issues.append({
+                "sentence": sentence,
+                "score": result[0]['score'],
+                "label": label,
+                "index": i
+            })
+    return sensitive_issues
 def generate_suggestions(text, grammar_issues, sensitive_issues):
+    suggestions = []
+    for issue in grammar_issues:
+        if issue['suggestions']:
+            suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})")
+    for issue in sensitive_issues:
+        summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text']
+        suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})")
+    return suggestions
 def highlight_text(text, grammar_issues, sensitive_issues):
+    highlighted = text
+    offset_adjust = 0
+    for issue in grammar_issues:
+        start = issue['offset'] + offset_adjust
+        end = start + issue['length']
+        error_text = highlighted[start:end]
+        span = f"<span style='background-color: yellow'>{error_text}</span>"
+        highlighted = highlighted[:start] + span + highlighted[end:]
+        offset_adjust += len(span) - len(error_text)
+    sentences = sent_tokenize(text)
+    for issue in sensitive_issues:
+        sentence = issue['sentence']
+        highlighted = highlighted.replace(sentence, f"<span style='background-color: red'>{sentence}</span>")
+    return highlighted
+def review_blog(input_type, text_input, url_input):
+    if not text_input and not url_input:
+        return "Please provide text or a URL.", "", []
+    text = extract_text(input_type, text_input, url_input)
+    grammar_issues = check_grammar(text)
+    sensitive_issues = detect_sensitive_content(text)
+    suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
+    highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
+    suggestions_text = "\n".join([f"{i+1}. {sug}" for i, sug in enumerate(suggestions)])
+    return highlighted_text, suggestions_text, suggestions
 def apply_changes(text, suggestions, approved_indices):
+    sentences = sent_tokenize(text)
+    for idx in approved_indices.split(','):
+        try:
+            idx = int(idx.strip()) - 1
+            if idx < len(suggestions):
+                suggestion = suggestions[idx]
+                match = re.search(r"'([^']+)'$", suggestion)
+                if match:
+                    new_text = match.group(1)
+                    if "Rephrase sensitive content" in suggestion:
+                        orig_match = re.search(r"'([^']+)'\s+to:", suggestion)
+                        if orig_match:
+                            orig_sentence = orig_match.group(1)
+                            text = text.replace(orig_sentence, new_text)
+                    else:
+                        orig_match = re.search(r"Replace '([^']+)'\s+with\s+'([^']+)'", suggestion)
+                        if orig_match:
+                            orig_text = orig_match.group(1)
+                            text = text.replace(orig_text, new_text)
+        except ValueError:
+            continue
+    return text
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## 🧠 AI Blog Reviewer")
+    gr.Markdown("Analyze blog text or URL for grammar issues and sensitive content (bias, toxicity, etc.).")
     input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
+    text_input = gr.Textbox(label="Blog Text", lines=10, visible=True)
+    url_input = gr.Textbox(label="Blog URL", visible=False)
+    def toggle_input(type):
         return {
+            text_input: gr.update(visible=type == "Text"),
+            url_input: gr.update(visible=type == "URL")
         }
     input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])
+    review_btn = gr.Button("🔍 Review Blog")
+    highlighted_output = gr.HTML(label="Highlighted Output")
+    suggestions_output = gr.Textbox(label="Suggestions", lines=8)
+    approve_indices = gr.Textbox(label="Approve Suggestions (e.g., 1,2)")
+    apply_btn = gr.Button("✅ Apply Suggestions")
+    final_output = gr.Textbox(label="Updated Text", lines=10)
     suggestions_state = gr.State()
+    review_btn.click(fn=review_blog,
+                     inputs=[input_type, text_input, url_input],
+                     outputs=[highlighted_output, suggestions_output, suggestions_state])
+    apply_btn.click(fn=apply_changes,
+                    inputs=[text_input, suggestions_state, approve_indices],
+                    outputs=final_output)
+demo.launch()