Spaces:

JaishnaCodz
/

AI-Blog-Reviewer

Running

App Files Files Community

JaishnaCodz commited on Jul 16

Commit

7d19342

verified ·

1 Parent(s): b8a6b71

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -64

app.py CHANGED Viewed

@@ -1,84 +1,105 @@
 import gradio as gr
-from transformers import pipeline
 from nltk import download, sent_tokenize
 import re
-# Download necessary NLTK models
 download('punkt')
-download('punkt_tab')  # Fixes the recent error with PunktTokenizer
-# Load the grammar correction model (T5 based)
-grammar_corrector = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
-# Define sensitive/toxic keyword patterns (you can extend this list)
-sensitive_keywords = [
-    r"\bhate\b", r"\bstupid\b", r"\bidiot\b", r"\btrash\b", r"\bkill\b",
-    r"\bnot allowed\b", r"\bnobody cares\b", r"\bterrorist\b", r"\bgo back\b",
-    r"\bimmigrants\b", r"\bslur\b", r"\bdisgusting\b"
-]
-def detect_sensitive_content(text):
-    # Tokenize into sentences
-    sentences = sent_tokenize(text)
-    highlighted = []
-    for sentence in sentences:
-        flagged = False
-        for pattern in sensitive_keywords:
-            if re.search(pattern, sentence, re.IGNORECASE):
-                flagged = True
-                break
-        if flagged:
-            sentence = f"<span style='color: red'>{sentence}</span>"
-        highlighted.append(sentence)
-    return " ".join(highlighted)
-def highlight_grammar(original, corrected):
-    original_words = original.split()
-    corrected_words = corrected.split()
-    highlighted = []
-    for orig, corr in zip(original_words, corrected_words):
-        if orig != corr:
-            highlighted.append(f"<span style='color: yellow'>{corr}</span>")
-        else:
-            highlighted.append(corr)
-    # Add remaining corrected words
-    if len(corrected_words) > len(original_words):
-        for word in corrected_words[len(original_words):]:
-            highlighted.append(f"<span style='color: yellow'>{word}</span>")
-    return " ".join(highlighted)
 def review_blog(input_text, input_type):
     if input_type == "URL":
-        return "URL support is under development."
-    # Step 1: Grammar correction
-    corrected_output = grammar_corrector(input_text, max_length=512, do_sample=False)[0]["generated_text"]
-    # Step 2: Highlight grammar issues
-    grammar_highlighted = highlight_grammar(input_text, corrected_output)
-    # Step 3: Highlight sensitive content
-    sensitive_highlighted = detect_sensitive_content(corrected_output)
-    return gr.update(value=grammar_highlighted), gr.update(value=sensitive_highlighted)
 # Gradio UI
 with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
-    gr.Markdown("📝 **AI Blog Reviewer**")
-    gr.Markdown("#### Highlights:\n- <span style='color: yellow'>Yellow</span>: Grammar corrections\n- <span style='color: red'>Red</span>: Sensitive or toxic content")
     input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
-    blog_input = gr.Textbox(lines=8, label="Blog Text", placeholder="Paste your blog content here...")
     review_btn = gr.Button("Review Blog")
-    gr.Markdown("#### 🔧 Grammar Corrections:")
-    grammar_output = gr.HTML()
-    gr.Markdown("#### 🚨 Sensitive/Toxic Content:")
-    sensitive_output = gr.HTML()
-    review_btn.click(review_blog, inputs=[blog_input, input_type], outputs=[grammar_output, sensitive_output])
-demo.launch()

 import gradio as gr
+import requests
+from bs4 import BeautifulSoup
 from nltk import download, sent_tokenize
+import google.generativeai as genai
+import os
 import re
+# Download NLTK data
 download('punkt')
+download('punkt_tab')
+# Configure Gemini API using Hugging Face Spaces secrets
+api_key = os.environ.get("GEMINI_API_KEY")
+if not api_key:
+    raise ValueError("GEMINI_API_KEY not found in environment variables. Please set it in Hugging Face Spaces secrets.")
+genai.configure(api_key=api_key)
+model = genai.GenerativeModel('gemini-2.5')
+# Prompt for Gemini to analyze text
+PROMPT = """
+You are an AI content reviewer. Analyze the provided text for the following:
+1. **Grammar Issues**: Identify and suggest corrections for grammatical errors.
+2. **Legal Policy Violations**: Flag content that may violate common legal policies (e.g., copyright infringement, defamation, incitement to violence).
+3. **Crude/Abusive Language**: Detect crude, offensive, or abusive language.
+4. **Sensitive Topics**: Identify content related to sensitive topics such as racism, gender bias, or other forms of discrimination.
+Return the results in the following markdown format:
+```markdown
+# Blog Review Report
+## Grammar Corrections
+- [List grammar issues and suggested corrections]
+## Legal Policy Violations
+- [List any potential legal violations or "None detected"]
+## Crude/Abusive Language
+- [List instances of crude or abusive language or "None detected"]
+## Sensitive Topics
+- [List instances of sensitive topics or "None detected"]
+```
+For each issue, provide the original sentence, the issue, and the suggested correction or explanation. Be precise and concise.
+"""
+def fetch_url_content(url):
+    try:
+        response = requests.get(url, timeout=10)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+        # Extract text from common content tags
+        content = ' '.join([p.get_text(strip=True) for p in soup.find_all(['p', 'article', 'div'])])
+        return content if content else "No readable content found on the page."
+    except Exception as e:
+        return f"Error fetching URL: {str(e)}"
 def review_blog(input_text, input_type):
+    # Handle input type
     if input_type == "URL":
+        input_text = fetch_url_content(input_text)
+        if input_text.startswith("Error"):
+            return input_text, None
+    # Tokenize input for analysis
+    sentences = sent_tokenize(input_text)
+    analysis_text = "\n".join(sentences)
+    # Query Gemini with the prompt
+    try:
+        response = model.generate_content(PROMPT + "\n\nText to analyze:\n" + analysis_text)
+        report = response.text.strip()
+        # Ensure the response is markdown by removing any code fences
+        report = re.sub(r'^```markdown\n|```$', '', report, flags=re.MULTILINE)
+    except Exception as e:
+        report = f"Error analyzing content with Gemini: {str(e)}"
+    return report, report
 # Gradio UI
 with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
+    gr.Markdown("# 📝 AI Blog Reviewer")
+    gr.Markdown("Enter blog text or a URL to review for grammar, legal issues, crude language, and sensitive topics. The report is generated in markdown format.")
     input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
+    blog_input = gr.Textbox(lines=8, label="Blog Content or URL", placeholder="Paste your blog text or URL here...")
     review_btn = gr.Button("Review Blog")
+    gr.Markdown("### 📄 Review Report")
+    report_output = gr.Markdown()
+    download_btn = gr.File(label="Download Report", visible=False)
+    def handle_review_output(report, file_report):
+        if file_report and not file_report.startswith("Error"):
+            return report, gr.update(visible=True, value={"content": file_report, "filename": "blog_review_report.md"})
+        return report, gr.update(visible=False)
+    review_btn.click(
+        review_blog,
+        inputs=[blog_input, input_type],
+        outputs=[report_output, download_btn]
+    )
+demo.launch()