Spaces:
Running
Running
File size: 5,769 Bytes
768c740 56f7cbb 768c740 9994894 768c740 9994894 56f7cbb 01e4eed 5f0de1f 9994894 01e4eed 9994894 56f7cbb 01e4eed 9994894 56f7cbb 768c740 56f7cbb 9994894 56f7cbb 768c740 9994894 768c740 56f7cbb 9994894 56f7cbb 768c740 56f7cbb 768c740 56f7cbb 9994894 56f7cbb 768c740 56f7cbb 9994894 56f7cbb 768c740 56f7cbb 9994894 56f7cbb 9994894 56f7cbb 768c740 9994894 768c740 56f7cbb 768c740 56f7cbb 768c740 56f7cbb 768c740 9994894 768c740 9994894 768c740 56f7cbb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import gradio as gr
import language_tool_python
import nltk
from transformers import pipeline
from newspaper import Article
from nltk.tokenize import sent_tokenize
import re
nltk.download("punkt")
# Correct way to connect to the locally running LanguageTool server
grammar_tool = language_tool_python.LanguageTool(
language='en-US',
remote_server='http://localhost:8081/v2/'
)
# Hugging Face pipelines
toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
def extract_text(input_type, text_input, url_input):
if input_type == "URL" and url_input:
article = Article(url_input)
article.download()
article.parse()
return article.text
return text_input
def check_grammar(text):
matches = grammar_tool.check(text)
return [
{
"text": match.context,
"error": match.message,
"suggestions": match.replacements,
"offset": match.offset,
"length": match.errorLength
} for match in matches
]
def detect_sensitive_content(text):
sentences = sent_tokenize(text)
sensitive_issues = []
for i, sentence in enumerate(sentences):
result = toxicity_classifier(sentence)
if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
sensitive_issues.append({
"sentence": sentence,
"score": result[0]['score'],
"index": i
})
return sensitive_issues
def generate_suggestions(text, grammar_issues, sensitive_issues):
suggestions = []
for issue in grammar_issues:
if issue['suggestions']:
suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})")
for issue in sensitive_issues:
summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text']
suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})")
return suggestions
def highlight_text(text, grammar_issues, sensitive_issues):
highlighted = text
offset_adjust = 0
for issue in grammar_issues:
start = issue['offset'] + offset_adjust
end = start + issue['length']
error_text = highlighted[start:end]
span = f"<span style='background-color: yellow'>{error_text}</span>"
highlighted = highlighted[:start] + span + highlighted[end:]
offset_adjust += len(span) - len(error_text)
for issue in sensitive_issues:
highlighted = highlighted.replace(
issue['sentence'],
f"<span style='background-color: red'>{issue['sentence']}</span>"
)
return highlighted
def review_blog(input_type, text_input, url_input):
if not text_input and not url_input:
return "Please provide text or a URL.", "", []
text = extract_text(input_type, text_input, url_input)
grammar_issues = check_grammar(text)
sensitive_issues = detect_sensitive_content(text)
suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
suggestions_text = "\n".join([f"{i+1}. {s}" for i, s in enumerate(suggestions)])
return highlighted_text, suggestions_text, suggestions
def apply_changes(text, suggestions, approved_indices):
sentences = sent_tokenize(text)
for idx in approved_indices.split(','):
try:
idx = int(idx.strip()) - 1
if idx < len(suggestions):
suggestion = suggestions[idx]
match = re.search(r"'([^']+)'$", suggestion)
if match:
new_text = match.group(1)
if "Rephrase sensitive content" in suggestion:
orig_match = re.search(r"'([^']+)'\s+to:", suggestion)
if orig_match:
orig_sentence = orig_match.group(1)
text = text.replace(orig_sentence, new_text)
else:
orig_match = re.search(r"Replace '([^']+)' with '([^']+)'", suggestion)
if orig_match:
text = text.replace(orig_match.group(1), orig_match.group(2))
except ValueError:
continue
return text
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# ✨ AI Blog Reviewer")
gr.Markdown("Highlight grammar & sensitive issues. Rephrase toxic content. Approve and apply changes.")
input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
text_input = gr.Textbox(label="Blog Text", lines=10, visible=True)
url_input = gr.Textbox(label="Blog URL", visible=False)
def toggle_input(type):
return {
text_input: gr.update(visible=type == "Text"),
url_input: gr.update(visible=type == "URL")
}
input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])
review_btn = gr.Button("Review Content")
highlighted = gr.HTML()
suggestions = gr.Textbox(label="Suggestions", lines=10)
approved = gr.Textbox(label="Approve Suggestions (e.g., 1,3)")
apply_btn = gr.Button("Apply Changes")
final = gr.Textbox(label="Final Text", lines=10)
suggestions_state = gr.State()
review_btn.click(fn=review_blog, inputs=[input_type, text_input, url_input],
outputs=[highlighted, suggestions, suggestions_state])
apply_btn.click(fn=apply_changes, inputs=[text_input, suggestions_state, approved], outputs=final)
demo.launch()
|