Spaces:

JaishnaCodz
/

AI-Blog-Reviewer

Running

File size: 6,492 Bytes

768c740
 
56f7cbb
 
768c740
56f7cbb
768c740
56f7cbb
2867d5b
56f7cbb
 
51e1dd3
ffcdfc0
56f7cbb
2867d5b
56f7cbb
 
 
768c740
56f7cbb
2867d5b
 
 
 
 
 
 
56f7cbb
768c740
 
0425ea5
 
 
 
 
 
 
 
 
 
 
 
2867d5b
768c740
 
56f7cbb
 
 
 
 
 
 
 
 
 
 
 
 
768c740
 
56f7cbb
 
 
 
 
 
 
 
768c740
 
56f7cbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
768c740
56f7cbb
 
 
768c740
56f7cbb
2867d5b
 
 
56f7cbb
 
 
 
 
768c740
56f7cbb
768c740
 
56f7cbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2867d5b
768c740
0425ea5
 
768c740
 
56f7cbb
 
768c740
56f7cbb
768c740
56f7cbb
 
768c740
 
 
 
56f7cbb
 
 
 
 
 
768c740
 
 
56f7cbb
 
 
768c740
56f7cbb
 
 
768c740
56f7cbb

import gradio as gr
from transformers import pipeline
from newspaper import Article
import language_tool_python
import nltk
import re
from nltk.tokenize import sent_tokenize

# Download punkt tokenizer
nltk.download("punkt")

# Connect to the local LanguageTool server started via setup.sh
grammar_tool = language_tool_python.LanguageToolPublicAPI(language='en-US', endpoint='http://localhost:8081/')

# Load transformers pipelines
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
toxicity_classifier = pipeline("text-classification", model="Hate-speech-CNERG/bert-base-uncased-hatexplain")

def extract_text(input_type, text_input, url_input):
    if input_type == "URL" and url_input:
        try:
            article = Article(url_input)
            article.download()
            article.parse()
            return article.text
        except Exception as e:
            return f"Error fetching URL: {e}"
    return text_input

def check_grammar(text):
    try:
        matches = grammar_tool.check(text)
        return [
            {
                "text": match.context,
                "error": match.message,
                "suggestions": match.replacements,
                "offset": match.offset,
                "length": match.errorLength
            } for match in matches
        ]
    except Exception as e:
        return [{"text": "", "error": f"Grammar check failed: {str(e)}", "suggestions": [], "offset": 0, "length": 0}]

def detect_sensitive_content(text):
    sentences = sent_tokenize(text)
    sensitive_issues = []
    for i, sentence in enumerate(sentences):
        result = toxicity_classifier(sentence)
        label = result[0]['label'].lower()
        if any(term in label for term in ['toxic', 'hate', 'offensive']):
            sensitive_issues.append({
                "sentence": sentence,
                "score": result[0]['score'],
                "label": label,
                "index": i
            })
    return sensitive_issues

def generate_suggestions(text, grammar_issues, sensitive_issues):
    suggestions = []
    for issue in grammar_issues:
        if issue['suggestions']:
            suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})")
    for issue in sensitive_issues:
        summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text']
        suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})")
    return suggestions

def highlight_text(text, grammar_issues, sensitive_issues):
    highlighted = text
    offset_adjust = 0
    for issue in grammar_issues:
        start = issue['offset'] + offset_adjust
        end = start + issue['length']
        error_text = highlighted[start:end]
        span = f"<span style='background-color: yellow'>{error_text}</span>"
        highlighted = highlighted[:start] + span + highlighted[end:]
        offset_adjust += len(span) - len(error_text)

    for issue in sensitive_issues:
        sentence = issue['sentence']
        highlighted = highlighted.replace(sentence, f"<span style='background-color: red'>{sentence}</span>")

    return highlighted

def review_blog(input_type, text_input, url_input):
    if not text_input and not url_input:
        return "Please provide text or a URL.", "", []

    text = extract_text(input_type, text_input, url_input)
    if text.startswith("Error"):
        return text, "", []

    grammar_issues = check_grammar(text)
    sensitive_issues = detect_sensitive_content(text)
    suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
    highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
    suggestions_text = "\n".join([f"{i+1}. {sug}" for i, sug in enumerate(suggestions)])

    return highlighted_text, suggestions_text, suggestions

def apply_changes(text, suggestions, approved_indices):
    sentences = sent_tokenize(text)
    for idx in approved_indices.split(','):
        try:
            idx = int(idx.strip()) - 1
            if idx < len(suggestions):
                suggestion = suggestions[idx]
                match = re.search(r"'([^']+)'$", suggestion)
                if match:
                    new_text = match.group(1)
                    if "Rephrase sensitive content" in suggestion:
                        orig_match = re.search(r"'([^']+)'\s+to:", suggestion)
                        if orig_match:
                            orig_sentence = orig_match.group(1)
                            text = text.replace(orig_sentence, new_text)
                    else:
                        orig_match = re.search(r"Replace '([^']+)'\s+with\s+'([^']+)'", suggestion)
                        if orig_match:
                            orig_text = orig_match.group(1)
                            text = text.replace(orig_text, new_text)
        except ValueError:
            continue
    return text

# Gradio UI
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("## 🧠 AI Blog Reviewer with Grammar & Bias Detection")
    gr.Markdown("Enter blog content or a URL. Detect grammar issues and sensitive (toxic, biased) content.")

    input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
    text_input = gr.Textbox(label="Blog Text", lines=10, visible=True)
    url_input = gr.Textbox(label="Blog URL", visible=False)

    def toggle_input(type):
        return {
            text_input: gr.update(visible=type == "Text"),
            url_input: gr.update(visible=type == "URL")
        }

    input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])

    review_btn = gr.Button("🔍 Review Blog")
    highlighted_output = gr.HTML(label="Highlighted Output")
    suggestions_output = gr.Textbox(label="Suggestions", lines=8)
    approve_indices = gr.Textbox(label="Approve Suggestions (e.g., 1,2)")
    apply_btn = gr.Button("✅ Apply Suggestions")
    final_output = gr.Textbox(label="Updated Text", lines=10)

    suggestions_state = gr.State()

    review_btn.click(fn=review_blog,
                     inputs=[input_type, text_input, url_input],
                     outputs=[highlighted_output, suggestions_output, suggestions_state])

    apply_btn.click(fn=apply_changes,
                    inputs=[text_input, suggestions_state, approve_indices],
                    outputs=final_output)

demo.launch()