Spaces:
Running
Running
import gradio as gr | |
import language_tool_python | |
import nltk | |
from transformers import pipeline | |
from newspaper import Article | |
from nltk.tokenize import sent_tokenize | |
import re | |
nltk.download("punkt") | |
# Correct way to connect to the locally running LanguageTool server | |
grammar_tool = language_tool_python.LanguageTool( | |
language='en-US', | |
remote_server='http://localhost:8081/v2/' | |
) | |
# Hugging Face pipelines | |
toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert") | |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6") | |
def extract_text(input_type, text_input, url_input): | |
if input_type == "URL" and url_input: | |
article = Article(url_input) | |
article.download() | |
article.parse() | |
return article.text | |
return text_input | |
def check_grammar(text): | |
matches = grammar_tool.check(text) | |
return [ | |
{ | |
"text": match.context, | |
"error": match.message, | |
"suggestions": match.replacements, | |
"offset": match.offset, | |
"length": match.errorLength | |
} for match in matches | |
] | |
def detect_sensitive_content(text): | |
sentences = sent_tokenize(text) | |
sensitive_issues = [] | |
for i, sentence in enumerate(sentences): | |
result = toxicity_classifier(sentence) | |
if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7: | |
sensitive_issues.append({ | |
"sentence": sentence, | |
"score": result[0]['score'], | |
"index": i | |
}) | |
return sensitive_issues | |
def generate_suggestions(text, grammar_issues, sensitive_issues): | |
suggestions = [] | |
for issue in grammar_issues: | |
if issue['suggestions']: | |
suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})") | |
for issue in sensitive_issues: | |
summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text'] | |
suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})") | |
return suggestions | |
def highlight_text(text, grammar_issues, sensitive_issues): | |
highlighted = text | |
offset_adjust = 0 | |
for issue in grammar_issues: | |
start = issue['offset'] + offset_adjust | |
end = start + issue['length'] | |
error_text = highlighted[start:end] | |
span = f"<span style='background-color: yellow'>{error_text}</span>" | |
highlighted = highlighted[:start] + span + highlighted[end:] | |
offset_adjust += len(span) - len(error_text) | |
for issue in sensitive_issues: | |
highlighted = highlighted.replace( | |
issue['sentence'], | |
f"<span style='background-color: red'>{issue['sentence']}</span>" | |
) | |
return highlighted | |
def review_blog(input_type, text_input, url_input): | |
if not text_input and not url_input: | |
return "Please provide text or a URL.", "", [] | |
text = extract_text(input_type, text_input, url_input) | |
grammar_issues = check_grammar(text) | |
sensitive_issues = detect_sensitive_content(text) | |
suggestions = generate_suggestions(text, grammar_issues, sensitive_issues) | |
highlighted_text = highlight_text(text, grammar_issues, sensitive_issues) | |
suggestions_text = "\n".join([f"{i+1}. {s}" for i, s in enumerate(suggestions)]) | |
return highlighted_text, suggestions_text, suggestions | |
def apply_changes(text, suggestions, approved_indices): | |
sentences = sent_tokenize(text) | |
for idx in approved_indices.split(','): | |
try: | |
idx = int(idx.strip()) - 1 | |
if idx < len(suggestions): | |
suggestion = suggestions[idx] | |
match = re.search(r"'([^']+)'$", suggestion) | |
if match: | |
new_text = match.group(1) | |
if "Rephrase sensitive content" in suggestion: | |
orig_match = re.search(r"'([^']+)'\s+to:", suggestion) | |
if orig_match: | |
orig_sentence = orig_match.group(1) | |
text = text.replace(orig_sentence, new_text) | |
else: | |
orig_match = re.search(r"Replace '([^']+)' with '([^']+)'", suggestion) | |
if orig_match: | |
text = text.replace(orig_match.group(1), orig_match.group(2)) | |
except ValueError: | |
continue | |
return text | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# ✨ AI Blog Reviewer") | |
gr.Markdown("Highlight grammar & sensitive issues. Rephrase toxic content. Approve and apply changes.") | |
input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text") | |
text_input = gr.Textbox(label="Blog Text", lines=10, visible=True) | |
url_input = gr.Textbox(label="Blog URL", visible=False) | |
def toggle_input(type): | |
return { | |
text_input: gr.update(visible=type == "Text"), | |
url_input: gr.update(visible=type == "URL") | |
} | |
input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input]) | |
review_btn = gr.Button("Review Content") | |
highlighted = gr.HTML() | |
suggestions = gr.Textbox(label="Suggestions", lines=10) | |
approved = gr.Textbox(label="Approve Suggestions (e.g., 1,3)") | |
apply_btn = gr.Button("Apply Changes") | |
final = gr.Textbox(label="Final Text", lines=10) | |
suggestions_state = gr.State() | |
review_btn.click(fn=review_blog, inputs=[input_type, text_input, url_input], | |
outputs=[highlighted, suggestions, suggestions_state]) | |
apply_btn.click(fn=apply_changes, inputs=[text_input, suggestions_state, approved], outputs=final) | |
demo.launch() | |