File size: 5,769 Bytes
768c740
56f7cbb
768c740
9994894
 
768c740
9994894
56f7cbb
 
 
01e4eed
5f0de1f
9994894
01e4eed
9994894
56f7cbb
01e4eed
9994894
 
56f7cbb
 
768c740
56f7cbb
9994894
 
 
 
56f7cbb
768c740
 
9994894
 
 
 
 
 
 
 
 
 
768c740
 
56f7cbb
 
 
 
9994894
56f7cbb
 
 
 
 
 
768c740
 
56f7cbb
 
 
 
 
 
 
 
768c740
 
56f7cbb
 
 
 
 
 
 
 
 
 
9994894
 
 
 
56f7cbb
768c740
56f7cbb
 
 
 
 
 
 
 
9994894
56f7cbb
768c740
 
56f7cbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9994894
56f7cbb
9994894
56f7cbb
 
 
 
768c740
9994894
 
768c740
 
56f7cbb
 
768c740
56f7cbb
768c740
56f7cbb
 
768c740
 
 
 
9994894
 
 
 
 
 
768c740
 
 
9994894
 
 
768c740
56f7cbb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import gradio as gr
import language_tool_python
import nltk
from transformers import pipeline
from newspaper import Article
from nltk.tokenize import sent_tokenize
import re

nltk.download("punkt")

# Correct way to connect to the locally running LanguageTool server
grammar_tool = language_tool_python.LanguageTool(
    language='en-US',
    remote_server='http://localhost:8081/v2/'
)


# Hugging Face pipelines
toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")

def extract_text(input_type, text_input, url_input):
    if input_type == "URL" and url_input:
        article = Article(url_input)
        article.download()
        article.parse()
        return article.text
    return text_input

def check_grammar(text):
    matches = grammar_tool.check(text)
    return [
        {
            "text": match.context,
            "error": match.message,
            "suggestions": match.replacements,
            "offset": match.offset,
            "length": match.errorLength
        } for match in matches
    ]

def detect_sensitive_content(text):
    sentences = sent_tokenize(text)
    sensitive_issues = []
    for i, sentence in enumerate(sentences):
        result = toxicity_classifier(sentence)
        if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
            sensitive_issues.append({
                "sentence": sentence,
                "score": result[0]['score'],
                "index": i
            })
    return sensitive_issues

def generate_suggestions(text, grammar_issues, sensitive_issues):
    suggestions = []
    for issue in grammar_issues:
        if issue['suggestions']:
            suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})")
    for issue in sensitive_issues:
        summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text']
        suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})")
    return suggestions

def highlight_text(text, grammar_issues, sensitive_issues):
    highlighted = text
    offset_adjust = 0
    for issue in grammar_issues:
        start = issue['offset'] + offset_adjust
        end = start + issue['length']
        error_text = highlighted[start:end]
        span = f"<span style='background-color: yellow'>{error_text}</span>"
        highlighted = highlighted[:start] + span + highlighted[end:]
        offset_adjust += len(span) - len(error_text)
    for issue in sensitive_issues:
        highlighted = highlighted.replace(
            issue['sentence'],
            f"<span style='background-color: red'>{issue['sentence']}</span>"
        )
    return highlighted

def review_blog(input_type, text_input, url_input):
    if not text_input and not url_input:
        return "Please provide text or a URL.", "", []
    text = extract_text(input_type, text_input, url_input)
    grammar_issues = check_grammar(text)
    sensitive_issues = detect_sensitive_content(text)
    suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
    highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
    suggestions_text = "\n".join([f"{i+1}. {s}" for i, s in enumerate(suggestions)])
    return highlighted_text, suggestions_text, suggestions

def apply_changes(text, suggestions, approved_indices):
    sentences = sent_tokenize(text)
    for idx in approved_indices.split(','):
        try:
            idx = int(idx.strip()) - 1
            if idx < len(suggestions):
                suggestion = suggestions[idx]
                match = re.search(r"'([^']+)'$", suggestion)
                if match:
                    new_text = match.group(1)
                    if "Rephrase sensitive content" in suggestion:
                        orig_match = re.search(r"'([^']+)'\s+to:", suggestion)
                        if orig_match:
                            orig_sentence = orig_match.group(1)
                            text = text.replace(orig_sentence, new_text)
                    else:
                        orig_match = re.search(r"Replace '([^']+)' with '([^']+)'", suggestion)
                        if orig_match:
                            text = text.replace(orig_match.group(1), orig_match.group(2))
        except ValueError:
            continue
    return text

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# ✨ AI Blog Reviewer")
    gr.Markdown("Highlight grammar & sensitive issues. Rephrase toxic content. Approve and apply changes.")

    input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
    text_input = gr.Textbox(label="Blog Text", lines=10, visible=True)
    url_input = gr.Textbox(label="Blog URL", visible=False)

    def toggle_input(type):
        return {
            text_input: gr.update(visible=type == "Text"),
            url_input: gr.update(visible=type == "URL")
        }

    input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])

    review_btn = gr.Button("Review Content")
    highlighted = gr.HTML()
    suggestions = gr.Textbox(label="Suggestions", lines=10)
    approved = gr.Textbox(label="Approve Suggestions (e.g., 1,3)")
    apply_btn = gr.Button("Apply Changes")
    final = gr.Textbox(label="Final Text", lines=10)

    suggestions_state = gr.State()

    review_btn.click(fn=review_blog, inputs=[input_type, text_input, url_input],
                     outputs=[highlighted, suggestions, suggestions_state])
    apply_btn.click(fn=apply_changes, inputs=[text_input, suggestions_state, approved], outputs=final)

demo.launch()