import gradio as gr from transformers import pipeline from newspaper import Article import language_tool_python import nltk import re from nltk.tokenize import sent_tokenize # Download punkt tokenizer nltk.download("punkt") # Connect to the local LanguageTool server started via setup.sh grammar_tool = language_tool_python.LanguageToolPublicAPI(language='en-US', endpoint='http://localhost:8081/') # Load transformers pipelines summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6") toxicity_classifier = pipeline("text-classification", model="Hate-speech-CNERG/bert-base-uncased-hatexplain") def extract_text(input_type, text_input, url_input): if input_type == "URL" and url_input: try: article = Article(url_input) article.download() article.parse() return article.text except Exception as e: return f"Error fetching URL: {e}" return text_input def check_grammar(text): try: matches = grammar_tool.check(text) return [ { "text": match.context, "error": match.message, "suggestions": match.replacements, "offset": match.offset, "length": match.errorLength } for match in matches ] except Exception as e: return [{"text": "", "error": f"Grammar check failed: {str(e)}", "suggestions": [], "offset": 0, "length": 0}] def detect_sensitive_content(text): sentences = sent_tokenize(text) sensitive_issues = [] for i, sentence in enumerate(sentences): result = toxicity_classifier(sentence) label = result[0]['label'].lower() if any(term in label for term in ['toxic', 'hate', 'offensive']): sensitive_issues.append({ "sentence": sentence, "score": result[0]['score'], "label": label, "index": i }) return sensitive_issues def generate_suggestions(text, grammar_issues, sensitive_issues): suggestions = [] for issue in grammar_issues: if issue['suggestions']: suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})") for issue in sensitive_issues: summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text'] suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})") return suggestions def highlight_text(text, grammar_issues, sensitive_issues): highlighted = text offset_adjust = 0 for issue in grammar_issues: start = issue['offset'] + offset_adjust end = start + issue['length'] error_text = highlighted[start:end] span = f"{error_text}" highlighted = highlighted[:start] + span + highlighted[end:] offset_adjust += len(span) - len(error_text) for issue in sensitive_issues: sentence = issue['sentence'] highlighted = highlighted.replace(sentence, f"{sentence}") return highlighted def review_blog(input_type, text_input, url_input): if not text_input and not url_input: return "Please provide text or a URL.", "", [] text = extract_text(input_type, text_input, url_input) if text.startswith("Error"): return text, "", [] grammar_issues = check_grammar(text) sensitive_issues = detect_sensitive_content(text) suggestions = generate_suggestions(text, grammar_issues, sensitive_issues) highlighted_text = highlight_text(text, grammar_issues, sensitive_issues) suggestions_text = "\n".join([f"{i+1}. {sug}" for i, sug in enumerate(suggestions)]) return highlighted_text, suggestions_text, suggestions def apply_changes(text, suggestions, approved_indices): sentences = sent_tokenize(text) for idx in approved_indices.split(','): try: idx = int(idx.strip()) - 1 if idx < len(suggestions): suggestion = suggestions[idx] match = re.search(r"'([^']+)'$", suggestion) if match: new_text = match.group(1) if "Rephrase sensitive content" in suggestion: orig_match = re.search(r"'([^']+)'\s+to:", suggestion) if orig_match: orig_sentence = orig_match.group(1) text = text.replace(orig_sentence, new_text) else: orig_match = re.search(r"Replace '([^']+)'\s+with\s+'([^']+)'", suggestion) if orig_match: orig_text = orig_match.group(1) text = text.replace(orig_text, new_text) except ValueError: continue return text # Gradio UI with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("## 🧠 AI Blog Reviewer with Grammar & Bias Detection") gr.Markdown("Enter blog content or a URL. Detect grammar issues and sensitive (toxic, biased) content.") input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text") text_input = gr.Textbox(label="Blog Text", lines=10, visible=True) url_input = gr.Textbox(label="Blog URL", visible=False) def toggle_input(type): return { text_input: gr.update(visible=type == "Text"), url_input: gr.update(visible=type == "URL") } input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input]) review_btn = gr.Button("🔍 Review Blog") highlighted_output = gr.HTML(label="Highlighted Output") suggestions_output = gr.Textbox(label="Suggestions", lines=8) approve_indices = gr.Textbox(label="Approve Suggestions (e.g., 1,2)") apply_btn = gr.Button("✅ Apply Suggestions") final_output = gr.Textbox(label="Updated Text", lines=10) suggestions_state = gr.State() review_btn.click(fn=review_blog, inputs=[input_type, text_input, url_input], outputs=[highlighted_output, suggestions_output, suggestions_state]) apply_btn.click(fn=apply_changes, inputs=[text_input, suggestions_state, approve_indices], outputs=final_output) demo.launch()