File size: 6,351 Bytes
768c740
 
56f7cbb
 
768c740
56f7cbb
768c740
56f7cbb
 
 
 
0425ea5
 
56f7cbb
0425ea5
56f7cbb
 
 
768c740
56f7cbb
 
 
 
 
 
768c740
 
0425ea5
 
 
 
 
 
 
 
 
 
 
 
 
 
768c740
 
56f7cbb
 
 
 
 
 
 
 
 
 
 
 
 
768c740
 
56f7cbb
 
 
 
 
 
 
 
768c740
 
56f7cbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
768c740
56f7cbb
 
 
768c740
56f7cbb
 
 
 
 
 
768c740
56f7cbb
768c740
 
56f7cbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0425ea5
768c740
0425ea5
 
768c740
 
56f7cbb
 
768c740
56f7cbb
768c740
56f7cbb
 
768c740
 
 
 
56f7cbb
 
 
 
 
 
768c740
 
 
56f7cbb
 
 
768c740
56f7cbb
 
 
768c740
56f7cbb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import gradio as gr
from transformers import pipeline
from newspaper import Article
import language_tool_python
import nltk
import re
from nltk.tokenize import sent_tokenize

# Download punkt for sentence tokenization
nltk.download("punkt")

# Use local LanguageTool server (runs via setup.sh)
grammar_tool = language_tool_python.LanguageTool('en-US')

# Load summarizer and hate-speech detection model
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
toxicity_classifier = pipeline("text-classification", model="Hate-speech-CNERG/bert-base-uncased-hatexplain")

def extract_text(input_type, text_input, url_input):
    if input_type == "URL" and url_input:
        article = Article(url_input)
        article.download()
        article.parse()
        return article.text
    return text_input

def check_grammar(text):
    try:
        matches = grammar_tool.check(text)
        return [
            {
                "text": match.context,
                "error": match.message,
                "suggestions": match.replacements,
                "offset": match.offset,
                "length": match.errorLength
            } for match in matches
        ]
    except Exception as e:
        print(f"[Grammar Check Error] {e}")
        return [{"text": "", "error": "Grammar check skipped (server error)", "suggestions": [], "offset": 0, "length": 0}]

def detect_sensitive_content(text):
    sentences = sent_tokenize(text)
    sensitive_issues = []
    for i, sentence in enumerate(sentences):
        result = toxicity_classifier(sentence)
        label = result[0]['label'].lower()
        if any(term in label for term in ['toxic', 'hate', 'offensive']):
            sensitive_issues.append({
                "sentence": sentence,
                "score": result[0]['score'],
                "label": label,
                "index": i
            })
    return sensitive_issues

def generate_suggestions(text, grammar_issues, sensitive_issues):
    suggestions = []
    for issue in grammar_issues:
        if issue['suggestions']:
            suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})")
    for issue in sensitive_issues:
        summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text']
        suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})")
    return suggestions

def highlight_text(text, grammar_issues, sensitive_issues):
    highlighted = text
    offset_adjust = 0
    for issue in grammar_issues:
        start = issue['offset'] + offset_adjust
        end = start + issue['length']
        error_text = highlighted[start:end]
        span = f"<span style='background-color: yellow'>{error_text}</span>"
        highlighted = highlighted[:start] + span + highlighted[end:]
        offset_adjust += len(span) - len(error_text)

    for issue in sensitive_issues:
        sentence = issue['sentence']
        highlighted = highlighted.replace(sentence, f"<span style='background-color: red'>{sentence}</span>")

    return highlighted

def review_blog(input_type, text_input, url_input):
    if not text_input and not url_input:
        return "Please provide text or a URL.", "", []

    text = extract_text(input_type, text_input, url_input)
    grammar_issues = check_grammar(text)
    sensitive_issues = detect_sensitive_content(text)
    suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
    highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
    suggestions_text = "\n".join([f"{i+1}. {sug}" for i, sug in enumerate(suggestions)])

    return highlighted_text, suggestions_text, suggestions

def apply_changes(text, suggestions, approved_indices):
    sentences = sent_tokenize(text)
    for idx in approved_indices.split(','):
        try:
            idx = int(idx.strip()) - 1
            if idx < len(suggestions):
                suggestion = suggestions[idx]
                match = re.search(r"'([^']+)'$", suggestion)
                if match:
                    new_text = match.group(1)
                    if "Rephrase sensitive content" in suggestion:
                        orig_match = re.search(r"'([^']+)'\s+to:", suggestion)
                        if orig_match:
                            orig_sentence = orig_match.group(1)
                            text = text.replace(orig_sentence, new_text)
                    else:
                        orig_match = re.search(r"Replace '([^']+)'\s+with\s+'([^']+)'", suggestion)
                        if orig_match:
                            orig_text = orig_match.group(1)
                            text = text.replace(orig_text, new_text)
        except ValueError:
            continue
    return text

# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("## 🧠 AI Blog Reviewer with Grammar & Bias Detection")
    gr.Markdown("Enter blog content or a URL. Detect grammar issues and sensitive (toxic, biased) content.")

    input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
    text_input = gr.Textbox(label="Blog Text", lines=10, visible=True)
    url_input = gr.Textbox(label="Blog URL", visible=False)

    def toggle_input(type):
        return {
            text_input: gr.update(visible=type == "Text"),
            url_input: gr.update(visible=type == "URL")
        }

    input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])

    review_btn = gr.Button("🔍 Review Blog")
    highlighted_output = gr.HTML(label="Highlighted Output")
    suggestions_output = gr.Textbox(label="Suggestions", lines=8)
    approve_indices = gr.Textbox(label="Approve Suggestions (e.g., 1,2)")
    apply_btn = gr.Button("✅ Apply Suggestions")
    final_output = gr.Textbox(label="Updated Text", lines=10)

    suggestions_state = gr.State()

    review_btn.click(fn=review_blog,
                     inputs=[input_type, text_input, url_input],
                     outputs=[highlighted_output, suggestions_output, suggestions_state])

    apply_btn.click(fn=apply_changes,
                    inputs=[text_input, suggestions_state, approve_indices],
                    outputs=final_output)

demo.launch()