Spaces:
Running
Running
File size: 6,492 Bytes
768c740 56f7cbb 768c740 56f7cbb 768c740 56f7cbb 2867d5b 56f7cbb 51e1dd3 ffcdfc0 56f7cbb 2867d5b 56f7cbb 768c740 56f7cbb 2867d5b 56f7cbb 768c740 0425ea5 2867d5b 768c740 56f7cbb 768c740 56f7cbb 768c740 56f7cbb 768c740 56f7cbb 768c740 56f7cbb 2867d5b 56f7cbb 768c740 56f7cbb 768c740 56f7cbb 2867d5b 768c740 0425ea5 768c740 56f7cbb 768c740 56f7cbb 768c740 56f7cbb 768c740 56f7cbb 768c740 56f7cbb 768c740 56f7cbb 768c740 56f7cbb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
import gradio as gr
from transformers import pipeline
from newspaper import Article
import language_tool_python
import nltk
import re
from nltk.tokenize import sent_tokenize
# Download punkt tokenizer
nltk.download("punkt")
# Connect to the local LanguageTool server started via setup.sh
grammar_tool = language_tool_python.LanguageToolPublicAPI(language='en-US', endpoint='http://localhost:8081/')
# Load transformers pipelines
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
toxicity_classifier = pipeline("text-classification", model="Hate-speech-CNERG/bert-base-uncased-hatexplain")
def extract_text(input_type, text_input, url_input):
if input_type == "URL" and url_input:
try:
article = Article(url_input)
article.download()
article.parse()
return article.text
except Exception as e:
return f"Error fetching URL: {e}"
return text_input
def check_grammar(text):
try:
matches = grammar_tool.check(text)
return [
{
"text": match.context,
"error": match.message,
"suggestions": match.replacements,
"offset": match.offset,
"length": match.errorLength
} for match in matches
]
except Exception as e:
return [{"text": "", "error": f"Grammar check failed: {str(e)}", "suggestions": [], "offset": 0, "length": 0}]
def detect_sensitive_content(text):
sentences = sent_tokenize(text)
sensitive_issues = []
for i, sentence in enumerate(sentences):
result = toxicity_classifier(sentence)
label = result[0]['label'].lower()
if any(term in label for term in ['toxic', 'hate', 'offensive']):
sensitive_issues.append({
"sentence": sentence,
"score": result[0]['score'],
"label": label,
"index": i
})
return sensitive_issues
def generate_suggestions(text, grammar_issues, sensitive_issues):
suggestions = []
for issue in grammar_issues:
if issue['suggestions']:
suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})")
for issue in sensitive_issues:
summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text']
suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})")
return suggestions
def highlight_text(text, grammar_issues, sensitive_issues):
highlighted = text
offset_adjust = 0
for issue in grammar_issues:
start = issue['offset'] + offset_adjust
end = start + issue['length']
error_text = highlighted[start:end]
span = f"<span style='background-color: yellow'>{error_text}</span>"
highlighted = highlighted[:start] + span + highlighted[end:]
offset_adjust += len(span) - len(error_text)
for issue in sensitive_issues:
sentence = issue['sentence']
highlighted = highlighted.replace(sentence, f"<span style='background-color: red'>{sentence}</span>")
return highlighted
def review_blog(input_type, text_input, url_input):
if not text_input and not url_input:
return "Please provide text or a URL.", "", []
text = extract_text(input_type, text_input, url_input)
if text.startswith("Error"):
return text, "", []
grammar_issues = check_grammar(text)
sensitive_issues = detect_sensitive_content(text)
suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
suggestions_text = "\n".join([f"{i+1}. {sug}" for i, sug in enumerate(suggestions)])
return highlighted_text, suggestions_text, suggestions
def apply_changes(text, suggestions, approved_indices):
sentences = sent_tokenize(text)
for idx in approved_indices.split(','):
try:
idx = int(idx.strip()) - 1
if idx < len(suggestions):
suggestion = suggestions[idx]
match = re.search(r"'([^']+)'$", suggestion)
if match:
new_text = match.group(1)
if "Rephrase sensitive content" in suggestion:
orig_match = re.search(r"'([^']+)'\s+to:", suggestion)
if orig_match:
orig_sentence = orig_match.group(1)
text = text.replace(orig_sentence, new_text)
else:
orig_match = re.search(r"Replace '([^']+)'\s+with\s+'([^']+)'", suggestion)
if orig_match:
orig_text = orig_match.group(1)
text = text.replace(orig_text, new_text)
except ValueError:
continue
return text
# Gradio UI
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("## 🧠 AI Blog Reviewer with Grammar & Bias Detection")
gr.Markdown("Enter blog content or a URL. Detect grammar issues and sensitive (toxic, biased) content.")
input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
text_input = gr.Textbox(label="Blog Text", lines=10, visible=True)
url_input = gr.Textbox(label="Blog URL", visible=False)
def toggle_input(type):
return {
text_input: gr.update(visible=type == "Text"),
url_input: gr.update(visible=type == "URL")
}
input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])
review_btn = gr.Button("🔍 Review Blog")
highlighted_output = gr.HTML(label="Highlighted Output")
suggestions_output = gr.Textbox(label="Suggestions", lines=8)
approve_indices = gr.Textbox(label="Approve Suggestions (e.g., 1,2)")
apply_btn = gr.Button("✅ Apply Suggestions")
final_output = gr.Textbox(label="Updated Text", lines=10)
suggestions_state = gr.State()
review_btn.click(fn=review_blog,
inputs=[input_type, text_input, url_input],
outputs=[highlighted_output, suggestions_output, suggestions_state])
apply_btn.click(fn=apply_changes,
inputs=[text_input, suggestions_state, approve_indices],
outputs=final_output)
demo.launch()
|