File size: 6,492 Bytes
768c740
 
56f7cbb
 
768c740
56f7cbb
768c740
56f7cbb
2867d5b
56f7cbb
 
51e1dd3
ffcdfc0
56f7cbb
2867d5b
56f7cbb
 
 
768c740
56f7cbb
2867d5b
 
 
 
 
 
 
56f7cbb
768c740
 
0425ea5
 
 
 
 
 
 
 
 
 
 
 
2867d5b
768c740
 
56f7cbb
 
 
 
 
 
 
 
 
 
 
 
 
768c740
 
56f7cbb
 
 
 
 
 
 
 
768c740
 
56f7cbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
768c740
56f7cbb
 
 
768c740
56f7cbb
2867d5b
 
 
56f7cbb
 
 
 
 
768c740
56f7cbb
768c740
 
56f7cbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2867d5b
768c740
0425ea5
 
768c740
 
56f7cbb
 
768c740
56f7cbb
768c740
56f7cbb
 
768c740
 
 
 
56f7cbb
 
 
 
 
 
768c740
 
 
56f7cbb
 
 
768c740
56f7cbb
 
 
768c740
56f7cbb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import gradio as gr
from transformers import pipeline
from newspaper import Article
import language_tool_python
import nltk
import re
from nltk.tokenize import sent_tokenize

# Download punkt tokenizer
nltk.download("punkt")

# Connect to the local LanguageTool server started via setup.sh
grammar_tool = language_tool_python.LanguageToolPublicAPI(language='en-US', endpoint='http://localhost:8081/')

# Load transformers pipelines
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
toxicity_classifier = pipeline("text-classification", model="Hate-speech-CNERG/bert-base-uncased-hatexplain")

def extract_text(input_type, text_input, url_input):
    if input_type == "URL" and url_input:
        try:
            article = Article(url_input)
            article.download()
            article.parse()
            return article.text
        except Exception as e:
            return f"Error fetching URL: {e}"
    return text_input

def check_grammar(text):
    try:
        matches = grammar_tool.check(text)
        return [
            {
                "text": match.context,
                "error": match.message,
                "suggestions": match.replacements,
                "offset": match.offset,
                "length": match.errorLength
            } for match in matches
        ]
    except Exception as e:
        return [{"text": "", "error": f"Grammar check failed: {str(e)}", "suggestions": [], "offset": 0, "length": 0}]

def detect_sensitive_content(text):
    sentences = sent_tokenize(text)
    sensitive_issues = []
    for i, sentence in enumerate(sentences):
        result = toxicity_classifier(sentence)
        label = result[0]['label'].lower()
        if any(term in label for term in ['toxic', 'hate', 'offensive']):
            sensitive_issues.append({
                "sentence": sentence,
                "score": result[0]['score'],
                "label": label,
                "index": i
            })
    return sensitive_issues

def generate_suggestions(text, grammar_issues, sensitive_issues):
    suggestions = []
    for issue in grammar_issues:
        if issue['suggestions']:
            suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})")
    for issue in sensitive_issues:
        summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text']
        suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})")
    return suggestions

def highlight_text(text, grammar_issues, sensitive_issues):
    highlighted = text
    offset_adjust = 0
    for issue in grammar_issues:
        start = issue['offset'] + offset_adjust
        end = start + issue['length']
        error_text = highlighted[start:end]
        span = f"<span style='background-color: yellow'>{error_text}</span>"
        highlighted = highlighted[:start] + span + highlighted[end:]
        offset_adjust += len(span) - len(error_text)

    for issue in sensitive_issues:
        sentence = issue['sentence']
        highlighted = highlighted.replace(sentence, f"<span style='background-color: red'>{sentence}</span>")

    return highlighted

def review_blog(input_type, text_input, url_input):
    if not text_input and not url_input:
        return "Please provide text or a URL.", "", []

    text = extract_text(input_type, text_input, url_input)
    if text.startswith("Error"):
        return text, "", []

    grammar_issues = check_grammar(text)
    sensitive_issues = detect_sensitive_content(text)
    suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
    highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
    suggestions_text = "\n".join([f"{i+1}. {sug}" for i, sug in enumerate(suggestions)])

    return highlighted_text, suggestions_text, suggestions

def apply_changes(text, suggestions, approved_indices):
    sentences = sent_tokenize(text)
    for idx in approved_indices.split(','):
        try:
            idx = int(idx.strip()) - 1
            if idx < len(suggestions):
                suggestion = suggestions[idx]
                match = re.search(r"'([^']+)'$", suggestion)
                if match:
                    new_text = match.group(1)
                    if "Rephrase sensitive content" in suggestion:
                        orig_match = re.search(r"'([^']+)'\s+to:", suggestion)
                        if orig_match:
                            orig_sentence = orig_match.group(1)
                            text = text.replace(orig_sentence, new_text)
                    else:
                        orig_match = re.search(r"Replace '([^']+)'\s+with\s+'([^']+)'", suggestion)
                        if orig_match:
                            orig_text = orig_match.group(1)
                            text = text.replace(orig_text, new_text)
        except ValueError:
            continue
    return text

# Gradio UI
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("## 🧠 AI Blog Reviewer with Grammar & Bias Detection")
    gr.Markdown("Enter blog content or a URL. Detect grammar issues and sensitive (toxic, biased) content.")

    input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
    text_input = gr.Textbox(label="Blog Text", lines=10, visible=True)
    url_input = gr.Textbox(label="Blog URL", visible=False)

    def toggle_input(type):
        return {
            text_input: gr.update(visible=type == "Text"),
            url_input: gr.update(visible=type == "URL")
        }

    input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])

    review_btn = gr.Button("🔍 Review Blog")
    highlighted_output = gr.HTML(label="Highlighted Output")
    suggestions_output = gr.Textbox(label="Suggestions", lines=8)
    approve_indices = gr.Textbox(label="Approve Suggestions (e.g., 1,2)")
    apply_btn = gr.Button("✅ Apply Suggestions")
    final_output = gr.Textbox(label="Updated Text", lines=10)

    suggestions_state = gr.State()

    review_btn.click(fn=review_blog,
                     inputs=[input_type, text_input, url_input],
                     outputs=[highlighted_output, suggestions_output, suggestions_state])

    apply_btn.click(fn=apply_changes,
                    inputs=[text_input, suggestions_state, approve_indices],
                    outputs=final_output)

demo.launch()