Spaces:

JaishnaCodz
/

AI-Blog-Reviewer

Running

App Files Files Community

JaishnaCodz commited on Jul 16

Commit

768c740

verified ·

1 Parent(s): d5e169a

Update app.py

Browse files

Files changed (1) hide show

app.py +224 -224

app.py CHANGED Viewed

@@ -1,225 +1,225 @@
-import gradio as gr
-import language_tool_python
-import requests
-from newspaper import Article
-from transformers import pipeline
-import re
-import nltk
-from nltk.tokenize import sent_tokenize
-from autogen import AssistantAgent, UserProxyAgent, config_list_from_json
-nltk.download('punkt')
-# Initialize Hugging Face models
-toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
-summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
-# AutoGen configuration
-config_list = [
-    {
-        "model": "local",
-        "api_key": "none"
-    }
-]
-# Define AutoGen Agents (for modularity, but we'll call functions directly)
-user_proxy = UserProxyAgent(
-    name="UserProxy",
-    system_message="Coordinates tasks and passes inputs to other agents.",
-    human_input_mode="NEVER",
-    code_execution_config={"work_dir": "autogen_workdir", "use_docker": False}
-)
-text_extraction_agent = AssistantAgent(
-    name="TextExtractor",
-    system_message="Extracts text from URLs or processes raw text.",
-    llm_config={"config_list": config_list}
-)
-grammar_check_agent = AssistantAgent(
-    name="GrammarChecker",
-    system_message="Identifies spelling and grammar errors using LanguageTool.",
-    llm_config={"config_list": config_list}
-)
-sensitive_content_agent = AssistantAgent(
-    name="SensitiveContentDetector",
-    system_message="Detects toxic or sensitive content (e.g., racism, gender bias).",
-    llm_config={"config_list": config_list}
-)
-suggestion_agent = AssistantAgent(
-    name="SuggestionGenerator",
-    system_message="Generates suggestions to fix grammar and rephrase sensitive content.",
-    llm_config={"config_list": config_list}
-)
-coordinator_agent = AssistantAgent(
-    name="Coordinator",
-    system_message="Combines results, highlights issues, and formats outputs.",
-    llm_config={"config_list": config_list}
-)
-# Task functions
-def extract_text(input_type, text_input, url_input):
-    if input_type == "URL" and url_input:
-        try:
-            article = Article(url_input)
-            article.download()
-            article.parse()
-            return article.text
-        except Exception as e:
-            return f"Error fetching URL: {str(e)}"
-    return text_input
-def check_grammar(text):
-    try:
-        grammar_tool = language_tool_python.LanguageTool('en-US')
-        matches = grammar_tool.check(text)
-        return [
-            {
-                "text": match.context,
-                "error": match.message,
-                "suggestions": match.replacements,
-                "offset": match.offset,
-                "length": match.errorLength
-            } for match in matches
-        ]
-    except Exception as e:
-        return [{"text": "", "error": f"Grammar check failed: {str(e)}", "suggestions": [], "offset": 0, "length": 0}]
-def detect_sensitive_content(text):
-    sentences = sent_tokenize(text)
-    sensitive_issues = []
-    for i, sentence in enumerate(sentences):
-        result = toxicity_classifier(sentence)
-        if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
-            sensitive_issues.append({
-                "sentence": sentence,
-                "score": result[0]['score'],
-                "index": i
-            })
-    return sensitive_issues
-def generate_suggestions(text, grammar_issues, sensitive_issues):
-    suggestions = []
-    for issue in grammar_issues:
-        if issue['suggestions']:
-            suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})")
-    for issue in sensitive_issues:
-        try:
-            summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text']
-            suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})")
-        except Exception as e:
-            suggestions.append(f"Failed to rephrase '{issue['sentence']}': {str(e)}")
-    return suggestions
-def highlight_text(text, grammar_issues, sensitive_issues):
-    highlighted = text
-    offset_adjust = 0
-    for issue in grammar_issues:
-        start = issue['offset'] + offset_adjust
-        end = start + issue['length']
-        error_text = highlighted[start:end]
-        highlighted = highlighted[:start] + f"<span style='background-color: yellow'>{error_text}</span>" + highlighted[end:]
-        offset_adjust += len("<span style='background-color: yellow'>") + len("</span>")
-    sentences = sent_tokenize(text)
-    offset_adjust = 0
-    for issue in sensitive_issues:
-        sentence = issue['sentence']
-        start = highlighted.find(sentence, offset_adjust)
-        if start != -1:
-            end = start + len(sentence)
-            highlighted = highlighted[:start] + f"<span style='background-color: red'>{sentence}</span>" + highlighted[end:]
-            offset_adjust = end
-    return highlighted
-# Main function to process input
-def review_blog(input_type, text_input, url_input):
-    if not text_input and not url_input:
-        return "Please provide text or a URL.", "", []
-    # Step 1: Text Extraction
-    text = extract_text(input_type, text_input, url_input)
-    if text.startswith("Error"):
-        return text, "", []
-    # Step 2: Grammar Check
-    grammar_issues = check_grammar(text)
-    # Step 3: Sensitive Content Detection
-    sensitive_issues = detect_sensitive_content(text)
-    # Step 4: Generate Suggestions
-    suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
-    # Step 5: Coordinate Output
-    highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
-    suggestions_text = "\n".join([f"{i+1}. {sug}" for i, sug in enumerate(suggestions)])
-    return highlighted_text, suggestions_text, suggestions
-def apply_changes(text, suggestions, approved_indices):
-    sentences = sent_tokenize(text)
-    for idx in approved_indices.split(','):
-        try:
-            idx = int(idx.strip()) - 1
-            if idx < len(suggestions):
-                suggestion = suggestions[idx]
-                match = re.search(r"'([^']+)'$", suggestion)
-                if match:
-                    new_text = match.group(1)
-                    if "Rephrase sensitive content" in suggestion:
-                        orig_match = re.search(r"'([^']+)'\s+to:", suggestion)
-                        if orig_match:
-                            orig_sentence = orig_match.group(1)
-                            text = text.replace(orig_sentence, new_text)
-                    else:
-                        orig_match = re.search(r"Replace '([^']+)'\s+with\s+'([^']+)'", suggestion)
-                        if orig_match:
-                            orig_text = orig_match.group(1)
-                            text = text.replace(orig_text, new_text)
-        except ValueError:
-            continue  # Skip invalid indices
-    return text
-# Gradio interface
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# AI Blog Reviewer with AutoGen")
-    gr.Markdown("Enter blog text or a URL to review for spelling, grammar, and sensitive content. Approve suggested changes to update the text.")
-    input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
-    text_input = gr.Textbox(label="Blog Text", placeholder="Enter your blog text here...", lines=10, visible=True)
-    url_input = gr.Textbox(label="Blog URL", placeholder="Enter the blog URL here...", visible=False)
-    def toggle_input(input_type):
-        return {
-            text_input: gr.update(visible=input_type == "Text"),
-            url_input: gr.update(visible=input_type == "URL")
-        }
-    input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])
-    review_button = gr.Button("Review Content")
-    highlighted_output = gr.HTML(label="Highlighted Issues (Yellow: Grammar, Red: Sensitive)")
-    suggestions_output = gr.Textbox(label="Suggestions", lines=10)
-    approve_indices = gr.Textbox(label="Approve Suggestions (Enter numbers, e.g., '1,2,3')")
-    apply_button = gr.Button("Apply Approved Changes")
-    final_text = gr.Textbox(label="Final Text", lines=10)
-    suggestions_state = gr.State()
-    review_button.click(
-        fn=review_blog,
-        inputs=[input_type, text_input, url_input],
-        outputs=[highlighted_output, suggestions_output, suggestions_state]
-    )
-    apply_button.click(
-        fn=apply_changes,
-        inputs=[text_input, suggestions_state, approve_indices],
-        outputs=final_text
-    )
-# Launch Gradio interface
 demo.launch()

+import gradio as gr
+import language_tool_python
+import requests
+from newspaper import Article
+from transformers import pipeline
+import re
+import nltk
+from nltk.tokenize import sent_tokenize
+from autogen import AssistantAgent, UserProxyAgent, config_list_from_json
+nltk.download('punkt')
+# Initialize Hugging Face models
+toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
+summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
+# AutoGen configuration
+config_list = [
+    {
+        "model": "local",
+        "api_key": "none"
+    }
+]
+# Define AutoGen Agents (for modularity, but we'll call functions directly)
+user_proxy = UserProxyAgent(
+    name="UserProxy",
+    system_message="Coordinates tasks and passes inputs to other agents.",
+    human_input_mode="NEVER",
+    code_execution_config={"work_dir": "autogen_workdir", "use_docker": False}
+)
+text_extraction_agent = AssistantAgent(
+    name="TextExtractor",
+    system_message="Extracts text from URLs or processes raw text.",
+    llm_config={"config_list": config_list}
+)
+grammar_check_agent = AssistantAgent(
+    name="GrammarChecker",
+    system_message="Identifies spelling and grammar errors using LanguageTool.",
+    llm_config={"config_list": config_list}
+)
+sensitive_content_agent = AssistantAgent(
+    name="SensitiveContentDetector",
+    system_message="Detects toxic or sensitive content (e.g., racism, gender bias).",
+    llm_config={"config_list": config_list}
+)
+suggestion_agent = AssistantAgent(
+    name="SuggestionGenerator",
+    system_message="Generates suggestions to fix grammar and rephrase sensitive content.",
+    llm_config={"config_list": config_list}
+)
+coordinator_agent = AssistantAgent(
+    name="Coordinator",
+    system_message="Combines results, highlights issues, and formats outputs.",
+    llm_config={"config_list": config_list}
+)
+# Task functions
+def extract_text(input_type, text_input, url_input):
+    if input_type == "URL" and url_input:
+        try:
+            article = Article(url_input)
+            article.download()
+            article.parse()
+            return article.text
+        except Exception as e:
+            return f"Error fetching URL: {str(e)}"
+    return text_input
+def check_grammar(text):
+    try:
+        grammar_tool = language_tool_python.LanguageToolPublicAPI('en-US')
+        matches = grammar_tool.check(text)
+        return [
+            {
+                "text": match.context,
+                "error": match.message,
+                "suggestions": match.replacements,
+                "offset": match.offset,
+                "length": match.errorLength
+            } for match in matches
+        ]
+    except Exception as e:
+        return [{"text": "", "error": f"Grammar check failed: {str(e)}", "suggestions": [], "offset": 0, "length": 0}]
+def detect_sensitive_content(text):
+    sentences = sent_tokenize(text)
+    sensitive_issues = []
+    for i, sentence in enumerate(sentences):
+        result = toxicity_classifier(sentence)
+        if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
+            sensitive_issues.append({
+                "sentence": sentence,
+                "score": result[0]['score'],
+                "index": i
+            })
+    return sensitive_issues
+def generate_suggestions(text, grammar_issues, sensitive_issues):
+    suggestions = []
+    for issue in grammar_issues:
+        if issue['suggestions']:
+            suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})")
+    for issue in sensitive_issues:
+        try:
+            summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text']
+            suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})")
+        except Exception as e:
+            suggestions.append(f"Failed to rephrase '{issue['sentence']}': {str(e)}")
+    return suggestions
+def highlight_text(text, grammar_issues, sensitive_issues):
+    highlighted = text
+    offset_adjust = 0
+    for issue in grammar_issues:
+        start = issue['offset'] + offset_adjust
+        end = start + issue['length']
+        error_text = highlighted[start:end]
+        highlighted = highlighted[:start] + f"<span style='background-color: yellow'>{error_text}</span>" + highlighted[end:]
+        offset_adjust += len("<span style='background-color: yellow'>") + len("</span>")
+    sentences = sent_tokenize(text)
+    offset_adjust = 0
+    for issue in sensitive_issues:
+        sentence = issue['sentence']
+        start = highlighted.find(sentence, offset_adjust)
+        if start != -1:
+            end = start + len(sentence)
+            highlighted = highlighted[:start] + f"<span style='background-color: red'>{sentence}</span>" + highlighted[end:]
+            offset_adjust = end
+    return highlighted
+# Main function to process input
+def review_blog(input_type, text_input, url_input):
+    if not text_input and not url_input:
+        return "Please provide text or a URL.", "", []
+    # Step 1: Text Extraction
+    text = extract_text(input_type, text_input, url_input)
+    if text.startswith("Error"):
+        return text, "", []
+    # Step 2: Grammar Check
+    grammar_issues = check_grammar(text)
+    # Step 3: Sensitive Content Detection
+    sensitive_issues = detect_sensitive_content(text)
+    # Step 4: Generate Suggestions
+    suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
+    # Step 5: Coordinate Output
+    highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
+    suggestions_text = "\n".join([f"{i+1}. {sug}" for i, sug in enumerate(suggestions)])
+    return highlighted_text, suggestions_text, suggestions
+def apply_changes(text, suggestions, approved_indices):
+    sentences = sent_tokenize(text)
+    for idx in approved_indices.split(','):
+        try:
+            idx = int(idx.strip()) - 1
+            if idx < len(suggestions):
+                suggestion = suggestions[idx]
+                match = re.search(r"'([^']+)'$", suggestion)
+                if match:
+                    new_text = match.group(1)
+                    if "Rephrase sensitive content" in suggestion:
+                        orig_match = re.search(r"'([^']+)'\s+to:", suggestion)
+                        if orig_match:
+                            orig_sentence = orig_match.group(1)
+                            text = text.replace(orig_sentence, new_text)
+                    else:
+                        orig_match = re.search(r"Replace '([^']+)'\s+with\s+'([^']+)'", suggestion)
+                        if orig_match:
+                            orig_text = orig_match.group(1)
+                            text = text.replace(orig_text, new_text)
+        except ValueError:
+            continue  # Skip invalid indices
+    return text
+# Gradio interface
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# AI Blog Reviewer with AutoGen")
+    gr.Markdown("Enter blog text or a URL to review for spelling, grammar, and sensitive content. Approve suggested changes to update the text.")
+    input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
+    text_input = gr.Textbox(label="Blog Text", placeholder="Enter your blog text here...", lines=10, visible=True)
+    url_input = gr.Textbox(label="Blog URL", placeholder="Enter the blog URL here...", visible=False)
+    def toggle_input(input_type):
+        return {
+            text_input: gr.update(visible=input_type == "Text"),
+            url_input: gr.update(visible=input_type == "URL")
+        }
+    input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])
+    review_button = gr.Button("Review Content")
+    highlighted_output = gr.HTML(label="Highlighted Issues (Yellow: Grammar, Red: Sensitive)")
+    suggestions_output = gr.Textbox(label="Suggestions", lines=10)
+    approve_indices = gr.Textbox(label="Approve Suggestions (Enter numbers, e.g., '1,2,3')")
+    apply_button = gr.Button("Apply Approved Changes")
+    final_text = gr.Textbox(label="Final Text", lines=10)
+    suggestions_state = gr.State()
+    review_button.click(
+        fn=review_blog,
+        inputs=[input_type, text_input, url_input],
+        outputs=[highlighted_output, suggestions_output, suggestions_state]
+    )
+    apply_button.click(
+        fn=apply_changes,
+        inputs=[text_input, suggestions_state, approve_indices],
+        outputs=final_text
+    )
+# Launch Gradio interface
 demo.launch()