Spaces:

JaishnaCodz
/

AI-Blog-Reviewer

Running

App Files Files Community

JaishnaCodz commited on Jul 16

Commit

3ee9037

verified ·

1 Parent(s): 4f4b697

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -85

app.py CHANGED Viewed

@@ -8,11 +8,26 @@ import nltk
 from nltk.tokenize import sent_tokenize
 from autogen import AssistantAgent, UserProxyAgent
-nltk.download('punkt')
 # Initialize Hugging Face models
-toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
-summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
 # Define AutoGen Agents (for modularity, but we'll call functions directly)
 user_proxy = UserProxyAgent(
@@ -49,19 +64,23 @@ coordinator_agent = AssistantAgent(
 # Task functions
 def extract_text(input_type, text_input, url_input):
-    if input_type == "URL" and url_input:
-        try:
             article = Article(url_input)
             article.download()
             article.parse()
             return article.text
-        except Exception as e:
-            return f"Error fetching URL: {str(e)}"
-    return text_input
 def check_grammar(text):
     try:
         grammar_tool = language_tool_python.LanguageToolPublicAPI('en-US')
         matches = grammar_tool.check(text)
         return [
             {
@@ -73,103 +92,123 @@ def check_grammar(text):
             } for match in matches
         ]
     except Exception as e:
         return [{"text": "", "error": f"Grammar check failed: {str(e)}", "suggestions": [], "offset": 0, "length": 0}]
 def detect_sensitive_content(text):
-    sentences = sent_tokenize(text)
-    sensitive_issues = []
-    for i, sentence in enumerate(sentences):
-        result = toxicity_classifier(sentence)
-        if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
-            sensitive_issues.append({
-                "sentence": sentence,
-                "score": result[0]['score'],
-                "index": i
-            })
-    return sensitive_issues
 def generate_suggestions(text, grammar_issues, sensitive_issues):
-    suggestions = []
-    for issue in grammar_issues:
-        if issue['suggestions']:
-            suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})")
-    for issue in sensitive_issues:
-        try:
             summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text']
             suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})")
-        except Exception as e:
-            suggestions.append(f"Failed to rephrase '{issue['sentence']}': {str(e)}")
-    return suggestions
 def highlight_text(text, grammar_issues, sensitive_issues):
-    highlighted = text
-    offset_adjust = 0
-    for issue in grammar_issues:
-        start = issue['offset'] + offset_adjust
-        end = start + issue['length']
-        error_text = highlighted[start:end]
-        highlighted = highlighted[:start] + f"<span style='background-color: yellow'>{error_text}</span>" + highlighted[end:]
-        offset_adjust += len("<span style='background-color: yellow'>") + len("</span>")
-    sentences = sent_tokenize(text)
-    offset_adjust = 0
-    for issue in sensitive_issues:
-        sentence = issue['sentence']
-        start = highlighted.find(sentence, offset_adjust)
-        if start != -1:
-            end = start + len(sentence)
-            highlighted = highlighted[:start] + f"<span style='background-color: red'>{sentence}</span>" + highlighted[end:]
-            offset_adjust = end
-    return highlighted
 # Main function to process input
 def review_blog(input_type, text_input, url_input):
-    if not text_input and not url_input:
-        return "Please provide text or a URL.", "", []
-    # Step 1: Text Extraction
-    text = extract_text(input_type, text_input, url_input)
-    print(f"Processed text: {text}")  # Debug print to check text processing
-    if text.startswith("Error"):
-        return text, "", []
-    # Step 2: Grammar Check
-    grammar_issues = check_grammar(text)
-    # Step 3: Sensitive Content Detection
-    sensitive_issues = detect_sensitive_content(text)
-    # Step 4: Generate Suggestions
-    suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
-    # Step 5: Coordinate Output
-    highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
-    suggestions_text = "\n".join([f"{i+1}. {sug}" for i, sug in enumerate(suggestions)])
-    return highlighted_text, suggestions_text, suggestions
 def apply_changes(text, suggestions, approved_indices):
-    sentences = sent_tokenize(text)
-    for idx in approved_indices.split(','):
-        try:
-            idx = int(idx.strip()) - 1
-            if idx < len(suggestions):
-                suggestion = suggestions[idx]
-                match = re.search(r"'([^']+)'$", suggestion)
-                if match:
-                    new_text = match.group(1)
-                    if "Rephrase sensitive content" in suggestion:
-                        orig_match = re.search(r"'([^']+)'\s+to:", suggestion)
-                        if orig_match:
-                            orig_sentence = orig_match.group(1)
-                            text = text.replace(orig_sentence, new_text)
-                    else:
-                        orig_match = re.search(r"Replace '([^']+)'\s+with\s+'([^']+)'", suggestion)
-                        if orig_match:
-                            orig_text = orig_match.group(1)
-                            text = text.replace(orig_text, new_text)
-        except ValueError:
-            continue  # Skip invalid indices
-    return text
 # Gradio interface
 with gr.Blocks(theme=gr.themes.Soft()) as demo:

 from nltk.tokenize import sent_tokenize
 from autogen import AssistantAgent, UserProxyAgent
+# Download required NLTK data at startup
+try:
+    nltk.download('punkt')
+    nltk.download('punkt_tab')  # Ensure language-specific data is downloaded
+    print("NLTK data (punkt and punkt_tab) downloaded successfully.")
+except Exception as e:
+    print(f"Error downloading NLTK data: {str(e)}")
 # Initialize Hugging Face models
+try:
+    toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
+    print("Toxicity classifier loaded successfully.")
+except Exception as e:
+    print(f"Error loading toxicity classifier: {str(e)}")
+try:
+    summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
+    print("Summarizer loaded successfully.")
+except Exception as e:
+    print(f"Error loading summarizer: {str(e)}")
 # Define AutoGen Agents (for modularity, but we'll call functions directly)
 user_proxy = UserProxyAgent(
 # Task functions
 def extract_text(input_type, text_input, url_input):
+    try:
+        if input_type == "URL" and url_input:
             article = Article(url_input)
             article.download()
             article.parse()
+            print(f"Extracted text from URL: {url_input[:50]}...")  # Debug
             return article.text
+        print(f"Using input text: {text_input[:50]}...")  # Debug
+        return text_input
+    except Exception as e:
+        print(f"Error in extract_text: {str(e)}")
+        return f"Error fetching URL or processing text: {str(e)}"
 def check_grammar(text):
     try:
         grammar_tool = language_tool_python.LanguageToolPublicAPI('en-US')
+        print("Checking grammar...")  # Debug
         matches = grammar_tool.check(text)
         return [
             {
             } for match in matches
         ]
     except Exception as e:
+        print(f"Grammar check failed: {str(e)}")
         return [{"text": "", "error": f"Grammar check failed: {str(e)}", "suggestions": [], "offset": 0, "length": 0}]
 def detect_sensitive_content(text):
+    try:
+        sentences = sent_tokenize(text)
+        sensitive_issues = []
+        for i, sentence in enumerate(sentences):
+            result = toxicity_classifier(sentence)
+            if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
+                sensitive_issues.append({
+                    "sentence": sentence,
+                    "score": result[0]['score'],
+                    "index": i
+                })
+        print(f"Detected {len(sensitive_issues)} sensitive issues.")  # Debug
+        return sensitive_issues
+    except Exception as e:
+        print(f"Error in detect_sensitive_content: {str(e)}")
+        return []
 def generate_suggestions(text, grammar_issues, sensitive_issues):
+    try:
+        suggestions = []
+        for issue in grammar_issues:
+            if issue['suggestions']:
+                suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})")
+        for issue in sensitive_issues:
             summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text']
             suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})")
+        print(f"Generated {len(suggestions)} suggestions.")  # Debug
+        return suggestions
+    except Exception as e:
+        print(f"Error in generate_suggestions: {str(e)}")
+        return []
 def highlight_text(text, grammar_issues, sensitive_issues):
+    try:
+        highlighted = text
+        offset_adjust = 0
+        for issue in grammar_issues:
+            start = issue['offset'] + offset_adjust
+            end = start + issue['length']
+            error_text = highlighted[start:end]
+            highlighted = highlighted[:start] + f"<span style='background-color: yellow'>{error_text}</span>" + highlighted[end:]
+            offset_adjust += len("<span style='background-color: yellow'>") + len("</span>")
+        sentences = sent_tokenize(text)
+        offset_adjust = 0
+        for issue in sensitive_issues:
+            sentence = issue['sentence']
+            start = highlighted.find(sentence, offset_adjust)
+            if start != -1:
+                end = start + len(sentence)
+                highlighted = highlighted[:start] + f"<span style='background-color: red'>{sentence}</span>" + highlighted[end:]
+                offset_adjust = end
+        return highlighted
+    except Exception as e:
+        print(f"Error in highlight_text: {str(e)}")
+        return text
 # Main function to process input
 def review_blog(input_type, text_input, url_input):
+    try:
+        if not text_input and not url_input:
+            return "Please provide text or a URL.", "", []
+        # Step 1: Text Extraction
+        text = extract_text(input_type, text_input, url_input)
+        print(f"Processed text: {text[:50]}...")  # Debug
+        if text.startswith("Error"):
+            return text, "", []
+        # Step 2: Grammar Check
+        grammar_issues = check_grammar(text)
+        # Step 3: Sensitive Content Detection
+        sensitive_issues = detect_sensitive_content(text)
+        # Step 4: Generate Suggestions
+        suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
+        # Step 5: Coordinate Output
+        highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
+        suggestions_text = "\n".join([f"{i+1}. {sug}" for i, sug in enumerate(suggestions)])
+        return highlighted_text, suggestions_text, suggestions
+    except Exception as e:
+        print(f"Error in review_blog: {str(e)}")
+        return f"Error processing input: {str(e)}", "", []
 def apply_changes(text, suggestions, approved_indices):
+    try:
+        sentences = sent_tokenize(text)
+        for idx in approved_indices.split(','):
+            try:
+                idx = int(idx.strip()) - 1
+                if idx < len(suggestions):
+                    suggestion = suggestions[idx]
+                    match = re.search(r"'([^']+)'$", suggestion)
+                    if match:
+                        new_text = match.group(1)
+                        if "Rephrase sensitive content" in suggestion:
+                            orig_match = re.search(r"'([^']+)'\s+to:", suggestion)
+                            if orig_match:
+                                orig_sentence = orig_match.group(1)
+                                text = text.replace(orig_sentence, new_text)
+                        else:
+                            orig_match = re.search(r"Replace '([^']+)'\s+with\s+'([^']+)'", suggestion)
+                            if orig_match:
+                                orig_text = orig_match.group(1)
+                                text = text.replace(orig_text, new_text)
+            except ValueError:
+                continue  # Skip invalid indices
+        return text
+    except Exception as e:
+        print(f"Error in apply_changes: {str(e)}")
+        return text
 # Gradio interface
 with gr.Blocks(theme=gr.themes.Soft()) as demo: