Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
from newspaper import Article | |
import language_tool_python | |
import nltk | |
import re | |
from nltk.tokenize import sent_tokenize | |
# Download punkt tokenizer | |
nltk.download("punkt") | |
# Connect to the local LanguageTool server started via setup.sh | |
grammar_tool = language_tool_python.LanguageToolPublicAPI(language='en-US', endpoint='http://localhost:8081/') | |
# Load transformers pipelines | |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6") | |
toxicity_classifier = pipeline("text-classification", model="Hate-speech-CNERG/bert-base-uncased-hatexplain") | |
def extract_text(input_type, text_input, url_input): | |
if input_type == "URL" and url_input: | |
try: | |
article = Article(url_input) | |
article.download() | |
article.parse() | |
return article.text | |
except Exception as e: | |
return f"Error fetching URL: {e}" | |
return text_input | |
def check_grammar(text): | |
try: | |
matches = grammar_tool.check(text) | |
return [ | |
{ | |
"text": match.context, | |
"error": match.message, | |
"suggestions": match.replacements, | |
"offset": match.offset, | |
"length": match.errorLength | |
} for match in matches | |
] | |
except Exception as e: | |
return [{"text": "", "error": f"Grammar check failed: {str(e)}", "suggestions": [], "offset": 0, "length": 0}] | |
def detect_sensitive_content(text): | |
sentences = sent_tokenize(text) | |
sensitive_issues = [] | |
for i, sentence in enumerate(sentences): | |
result = toxicity_classifier(sentence) | |
label = result[0]['label'].lower() | |
if any(term in label for term in ['toxic', 'hate', 'offensive']): | |
sensitive_issues.append({ | |
"sentence": sentence, | |
"score": result[0]['score'], | |
"label": label, | |
"index": i | |
}) | |
return sensitive_issues | |
def generate_suggestions(text, grammar_issues, sensitive_issues): | |
suggestions = [] | |
for issue in grammar_issues: | |
if issue['suggestions']: | |
suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})") | |
for issue in sensitive_issues: | |
summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text'] | |
suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})") | |
return suggestions | |
def highlight_text(text, grammar_issues, sensitive_issues): | |
highlighted = text | |
offset_adjust = 0 | |
for issue in grammar_issues: | |
start = issue['offset'] + offset_adjust | |
end = start + issue['length'] | |
error_text = highlighted[start:end] | |
span = f"<span style='background-color: yellow'>{error_text}</span>" | |
highlighted = highlighted[:start] + span + highlighted[end:] | |
offset_adjust += len(span) - len(error_text) | |
for issue in sensitive_issues: | |
sentence = issue['sentence'] | |
highlighted = highlighted.replace(sentence, f"<span style='background-color: red'>{sentence}</span>") | |
return highlighted | |
def review_blog(input_type, text_input, url_input): | |
if not text_input and not url_input: | |
return "Please provide text or a URL.", "", [] | |
text = extract_text(input_type, text_input, url_input) | |
if text.startswith("Error"): | |
return text, "", [] | |
grammar_issues = check_grammar(text) | |
sensitive_issues = detect_sensitive_content(text) | |
suggestions = generate_suggestions(text, grammar_issues, sensitive_issues) | |
highlighted_text = highlight_text(text, grammar_issues, sensitive_issues) | |
suggestions_text = "\n".join([f"{i+1}. {sug}" for i, sug in enumerate(suggestions)]) | |
return highlighted_text, suggestions_text, suggestions | |
def apply_changes(text, suggestions, approved_indices): | |
sentences = sent_tokenize(text) | |
for idx in approved_indices.split(','): | |
try: | |
idx = int(idx.strip()) - 1 | |
if idx < len(suggestions): | |
suggestion = suggestions[idx] | |
match = re.search(r"'([^']+)'$", suggestion) | |
if match: | |
new_text = match.group(1) | |
if "Rephrase sensitive content" in suggestion: | |
orig_match = re.search(r"'([^']+)'\s+to:", suggestion) | |
if orig_match: | |
orig_sentence = orig_match.group(1) | |
text = text.replace(orig_sentence, new_text) | |
else: | |
orig_match = re.search(r"Replace '([^']+)'\s+with\s+'([^']+)'", suggestion) | |
if orig_match: | |
orig_text = orig_match.group(1) | |
text = text.replace(orig_text, new_text) | |
except ValueError: | |
continue | |
return text | |
# Gradio UI | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown("## 🧠 AI Blog Reviewer with Grammar & Bias Detection") | |
gr.Markdown("Enter blog content or a URL. Detect grammar issues and sensitive (toxic, biased) content.") | |
input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text") | |
text_input = gr.Textbox(label="Blog Text", lines=10, visible=True) | |
url_input = gr.Textbox(label="Blog URL", visible=False) | |
def toggle_input(type): | |
return { | |
text_input: gr.update(visible=type == "Text"), | |
url_input: gr.update(visible=type == "URL") | |
} | |
input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input]) | |
review_btn = gr.Button("🔍 Review Blog") | |
highlighted_output = gr.HTML(label="Highlighted Output") | |
suggestions_output = gr.Textbox(label="Suggestions", lines=8) | |
approve_indices = gr.Textbox(label="Approve Suggestions (e.g., 1,2)") | |
apply_btn = gr.Button("✅ Apply Suggestions") | |
final_output = gr.Textbox(label="Updated Text", lines=10) | |
suggestions_state = gr.State() | |
review_btn.click(fn=review_blog, | |
inputs=[input_type, text_input, url_input], | |
outputs=[highlighted_output, suggestions_output, suggestions_state]) | |
apply_btn.click(fn=apply_changes, | |
inputs=[text_input, suggestions_state, approve_indices], | |
outputs=final_output) | |
demo.launch() | |