Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,225 +1,225 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import language_tool_python
|
3 |
-
import requests
|
4 |
-
from newspaper import Article
|
5 |
-
from transformers import pipeline
|
6 |
-
import re
|
7 |
-
import nltk
|
8 |
-
from nltk.tokenize import sent_tokenize
|
9 |
-
from autogen import AssistantAgent, UserProxyAgent, config_list_from_json
|
10 |
-
|
11 |
-
nltk.download('punkt')
|
12 |
-
|
13 |
-
# Initialize Hugging Face models
|
14 |
-
toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
|
15 |
-
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
|
16 |
-
|
17 |
-
# AutoGen configuration
|
18 |
-
config_list = [
|
19 |
-
{
|
20 |
-
"model": "local",
|
21 |
-
"api_key": "none"
|
22 |
-
}
|
23 |
-
]
|
24 |
-
|
25 |
-
# Define AutoGen Agents (for modularity, but we'll call functions directly)
|
26 |
-
user_proxy = UserProxyAgent(
|
27 |
-
name="UserProxy",
|
28 |
-
system_message="Coordinates tasks and passes inputs to other agents.",
|
29 |
-
human_input_mode="NEVER",
|
30 |
-
code_execution_config={"work_dir": "autogen_workdir", "use_docker": False}
|
31 |
-
)
|
32 |
-
|
33 |
-
text_extraction_agent = AssistantAgent(
|
34 |
-
name="TextExtractor",
|
35 |
-
system_message="Extracts text from URLs or processes raw text.",
|
36 |
-
llm_config={"config_list": config_list}
|
37 |
-
)
|
38 |
-
|
39 |
-
grammar_check_agent = AssistantAgent(
|
40 |
-
name="GrammarChecker",
|
41 |
-
system_message="Identifies spelling and grammar errors using LanguageTool.",
|
42 |
-
llm_config={"config_list": config_list}
|
43 |
-
)
|
44 |
-
|
45 |
-
sensitive_content_agent = AssistantAgent(
|
46 |
-
name="SensitiveContentDetector",
|
47 |
-
system_message="Detects toxic or sensitive content (e.g., racism, gender bias).",
|
48 |
-
llm_config={"config_list": config_list}
|
49 |
-
)
|
50 |
-
|
51 |
-
suggestion_agent = AssistantAgent(
|
52 |
-
name="SuggestionGenerator",
|
53 |
-
system_message="Generates suggestions to fix grammar and rephrase sensitive content.",
|
54 |
-
llm_config={"config_list": config_list}
|
55 |
-
)
|
56 |
-
|
57 |
-
coordinator_agent = AssistantAgent(
|
58 |
-
name="Coordinator",
|
59 |
-
system_message="Combines results, highlights issues, and formats outputs.",
|
60 |
-
llm_config={"config_list": config_list}
|
61 |
-
)
|
62 |
-
|
63 |
-
# Task functions
|
64 |
-
def extract_text(input_type, text_input, url_input):
|
65 |
-
if input_type == "URL" and url_input:
|
66 |
-
try:
|
67 |
-
article = Article(url_input)
|
68 |
-
article.download()
|
69 |
-
article.parse()
|
70 |
-
return article.text
|
71 |
-
except Exception as e:
|
72 |
-
return f"Error fetching URL: {str(e)}"
|
73 |
-
return text_input
|
74 |
-
|
75 |
-
def check_grammar(text):
|
76 |
-
try:
|
77 |
-
grammar_tool = language_tool_python.
|
78 |
-
matches = grammar_tool.check(text)
|
79 |
-
return [
|
80 |
-
{
|
81 |
-
"text": match.context,
|
82 |
-
"error": match.message,
|
83 |
-
"suggestions": match.replacements,
|
84 |
-
"offset": match.offset,
|
85 |
-
"length": match.errorLength
|
86 |
-
} for match in matches
|
87 |
-
]
|
88 |
-
except Exception as e:
|
89 |
-
return [{"text": "", "error": f"Grammar check failed: {str(e)}", "suggestions": [], "offset": 0, "length": 0}]
|
90 |
-
|
91 |
-
def detect_sensitive_content(text):
|
92 |
-
sentences = sent_tokenize(text)
|
93 |
-
sensitive_issues = []
|
94 |
-
for i, sentence in enumerate(sentences):
|
95 |
-
result = toxicity_classifier(sentence)
|
96 |
-
if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
|
97 |
-
sensitive_issues.append({
|
98 |
-
"sentence": sentence,
|
99 |
-
"score": result[0]['score'],
|
100 |
-
"index": i
|
101 |
-
})
|
102 |
-
return sensitive_issues
|
103 |
-
|
104 |
-
def generate_suggestions(text, grammar_issues, sensitive_issues):
|
105 |
-
suggestions = []
|
106 |
-
for issue in grammar_issues:
|
107 |
-
if issue['suggestions']:
|
108 |
-
suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})")
|
109 |
-
for issue in sensitive_issues:
|
110 |
-
try:
|
111 |
-
summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text']
|
112 |
-
suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})")
|
113 |
-
except Exception as e:
|
114 |
-
suggestions.append(f"Failed to rephrase '{issue['sentence']}': {str(e)}")
|
115 |
-
return suggestions
|
116 |
-
|
117 |
-
def highlight_text(text, grammar_issues, sensitive_issues):
|
118 |
-
highlighted = text
|
119 |
-
offset_adjust = 0
|
120 |
-
for issue in grammar_issues:
|
121 |
-
start = issue['offset'] + offset_adjust
|
122 |
-
end = start + issue['length']
|
123 |
-
error_text = highlighted[start:end]
|
124 |
-
highlighted = highlighted[:start] + f"<span style='background-color: yellow'>{error_text}</span>" + highlighted[end:]
|
125 |
-
offset_adjust += len("<span style='background-color: yellow'>") + len("</span>")
|
126 |
-
sentences = sent_tokenize(text)
|
127 |
-
offset_adjust = 0
|
128 |
-
for issue in sensitive_issues:
|
129 |
-
sentence = issue['sentence']
|
130 |
-
start = highlighted.find(sentence, offset_adjust)
|
131 |
-
if start != -1:
|
132 |
-
end = start + len(sentence)
|
133 |
-
highlighted = highlighted[:start] + f"<span style='background-color: red'>{sentence}</span>" + highlighted[end:]
|
134 |
-
offset_adjust = end
|
135 |
-
return highlighted
|
136 |
-
|
137 |
-
# Main function to process input
|
138 |
-
def review_blog(input_type, text_input, url_input):
|
139 |
-
if not text_input and not url_input:
|
140 |
-
return "Please provide text or a URL.", "", []
|
141 |
-
|
142 |
-
# Step 1: Text Extraction
|
143 |
-
text = extract_text(input_type, text_input, url_input)
|
144 |
-
if text.startswith("Error"):
|
145 |
-
return text, "", []
|
146 |
-
|
147 |
-
# Step 2: Grammar Check
|
148 |
-
grammar_issues = check_grammar(text)
|
149 |
-
|
150 |
-
# Step 3: Sensitive Content Detection
|
151 |
-
sensitive_issues = detect_sensitive_content(text)
|
152 |
-
|
153 |
-
# Step 4: Generate Suggestions
|
154 |
-
suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
|
155 |
-
|
156 |
-
# Step 5: Coordinate Output
|
157 |
-
highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
|
158 |
-
suggestions_text = "\n".join([f"{i+1}. {sug}" for i, sug in enumerate(suggestions)])
|
159 |
-
|
160 |
-
return highlighted_text, suggestions_text, suggestions
|
161 |
-
|
162 |
-
def apply_changes(text, suggestions, approved_indices):
|
163 |
-
sentences = sent_tokenize(text)
|
164 |
-
for idx in approved_indices.split(','):
|
165 |
-
try:
|
166 |
-
idx = int(idx.strip()) - 1
|
167 |
-
if idx < len(suggestions):
|
168 |
-
suggestion = suggestions[idx]
|
169 |
-
match = re.search(r"'([^']+)'$", suggestion)
|
170 |
-
if match:
|
171 |
-
new_text = match.group(1)
|
172 |
-
if "Rephrase sensitive content" in suggestion:
|
173 |
-
orig_match = re.search(r"'([^']+)'\s+to:", suggestion)
|
174 |
-
if orig_match:
|
175 |
-
orig_sentence = orig_match.group(1)
|
176 |
-
text = text.replace(orig_sentence, new_text)
|
177 |
-
else:
|
178 |
-
orig_match = re.search(r"Replace '([^']+)'\s+with\s+'([^']+)'", suggestion)
|
179 |
-
if orig_match:
|
180 |
-
orig_text = orig_match.group(1)
|
181 |
-
text = text.replace(orig_text, new_text)
|
182 |
-
except ValueError:
|
183 |
-
continue # Skip invalid indices
|
184 |
-
return text
|
185 |
-
|
186 |
-
# Gradio interface
|
187 |
-
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
188 |
-
gr.Markdown("# AI Blog Reviewer with AutoGen")
|
189 |
-
gr.Markdown("Enter blog text or a URL to review for spelling, grammar, and sensitive content. Approve suggested changes to update the text.")
|
190 |
-
|
191 |
-
input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
|
192 |
-
text_input = gr.Textbox(label="Blog Text", placeholder="Enter your blog text here...", lines=10, visible=True)
|
193 |
-
url_input = gr.Textbox(label="Blog URL", placeholder="Enter the blog URL here...", visible=False)
|
194 |
-
|
195 |
-
def toggle_input(input_type):
|
196 |
-
return {
|
197 |
-
text_input: gr.update(visible=input_type == "Text"),
|
198 |
-
url_input: gr.update(visible=input_type == "URL")
|
199 |
-
}
|
200 |
-
|
201 |
-
input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])
|
202 |
-
|
203 |
-
review_button = gr.Button("Review Content")
|
204 |
-
highlighted_output = gr.HTML(label="Highlighted Issues (Yellow: Grammar, Red: Sensitive)")
|
205 |
-
suggestions_output = gr.Textbox(label="Suggestions", lines=10)
|
206 |
-
approve_indices = gr.Textbox(label="Approve Suggestions (Enter numbers, e.g., '1,2,3')")
|
207 |
-
apply_button = gr.Button("Apply Approved Changes")
|
208 |
-
final_text = gr.Textbox(label="Final Text", lines=10)
|
209 |
-
|
210 |
-
suggestions_state = gr.State()
|
211 |
-
|
212 |
-
review_button.click(
|
213 |
-
fn=review_blog,
|
214 |
-
inputs=[input_type, text_input, url_input],
|
215 |
-
outputs=[highlighted_output, suggestions_output, suggestions_state]
|
216 |
-
)
|
217 |
-
|
218 |
-
apply_button.click(
|
219 |
-
fn=apply_changes,
|
220 |
-
inputs=[text_input, suggestions_state, approve_indices],
|
221 |
-
outputs=final_text
|
222 |
-
)
|
223 |
-
|
224 |
-
# Launch Gradio interface
|
225 |
demo.launch()
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import language_tool_python
|
3 |
+
import requests
|
4 |
+
from newspaper import Article
|
5 |
+
from transformers import pipeline
|
6 |
+
import re
|
7 |
+
import nltk
|
8 |
+
from nltk.tokenize import sent_tokenize
|
9 |
+
from autogen import AssistantAgent, UserProxyAgent, config_list_from_json
|
10 |
+
|
11 |
+
nltk.download('punkt')
|
12 |
+
|
13 |
+
# Initialize Hugging Face models
|
14 |
+
toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
|
15 |
+
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
|
16 |
+
|
17 |
+
# AutoGen configuration
|
18 |
+
config_list = [
|
19 |
+
{
|
20 |
+
"model": "local",
|
21 |
+
"api_key": "none"
|
22 |
+
}
|
23 |
+
]
|
24 |
+
|
25 |
+
# Define AutoGen Agents (for modularity, but we'll call functions directly)
|
26 |
+
user_proxy = UserProxyAgent(
|
27 |
+
name="UserProxy",
|
28 |
+
system_message="Coordinates tasks and passes inputs to other agents.",
|
29 |
+
human_input_mode="NEVER",
|
30 |
+
code_execution_config={"work_dir": "autogen_workdir", "use_docker": False}
|
31 |
+
)
|
32 |
+
|
33 |
+
text_extraction_agent = AssistantAgent(
|
34 |
+
name="TextExtractor",
|
35 |
+
system_message="Extracts text from URLs or processes raw text.",
|
36 |
+
llm_config={"config_list": config_list}
|
37 |
+
)
|
38 |
+
|
39 |
+
grammar_check_agent = AssistantAgent(
|
40 |
+
name="GrammarChecker",
|
41 |
+
system_message="Identifies spelling and grammar errors using LanguageTool.",
|
42 |
+
llm_config={"config_list": config_list}
|
43 |
+
)
|
44 |
+
|
45 |
+
sensitive_content_agent = AssistantAgent(
|
46 |
+
name="SensitiveContentDetector",
|
47 |
+
system_message="Detects toxic or sensitive content (e.g., racism, gender bias).",
|
48 |
+
llm_config={"config_list": config_list}
|
49 |
+
)
|
50 |
+
|
51 |
+
suggestion_agent = AssistantAgent(
|
52 |
+
name="SuggestionGenerator",
|
53 |
+
system_message="Generates suggestions to fix grammar and rephrase sensitive content.",
|
54 |
+
llm_config={"config_list": config_list}
|
55 |
+
)
|
56 |
+
|
57 |
+
coordinator_agent = AssistantAgent(
|
58 |
+
name="Coordinator",
|
59 |
+
system_message="Combines results, highlights issues, and formats outputs.",
|
60 |
+
llm_config={"config_list": config_list}
|
61 |
+
)
|
62 |
+
|
63 |
+
# Task functions
|
64 |
+
def extract_text(input_type, text_input, url_input):
|
65 |
+
if input_type == "URL" and url_input:
|
66 |
+
try:
|
67 |
+
article = Article(url_input)
|
68 |
+
article.download()
|
69 |
+
article.parse()
|
70 |
+
return article.text
|
71 |
+
except Exception as e:
|
72 |
+
return f"Error fetching URL: {str(e)}"
|
73 |
+
return text_input
|
74 |
+
|
75 |
+
def check_grammar(text):
|
76 |
+
try:
|
77 |
+
grammar_tool = language_tool_python.LanguageToolPublicAPI('en-US')
|
78 |
+
matches = grammar_tool.check(text)
|
79 |
+
return [
|
80 |
+
{
|
81 |
+
"text": match.context,
|
82 |
+
"error": match.message,
|
83 |
+
"suggestions": match.replacements,
|
84 |
+
"offset": match.offset,
|
85 |
+
"length": match.errorLength
|
86 |
+
} for match in matches
|
87 |
+
]
|
88 |
+
except Exception as e:
|
89 |
+
return [{"text": "", "error": f"Grammar check failed: {str(e)}", "suggestions": [], "offset": 0, "length": 0}]
|
90 |
+
|
91 |
+
def detect_sensitive_content(text):
|
92 |
+
sentences = sent_tokenize(text)
|
93 |
+
sensitive_issues = []
|
94 |
+
for i, sentence in enumerate(sentences):
|
95 |
+
result = toxicity_classifier(sentence)
|
96 |
+
if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
|
97 |
+
sensitive_issues.append({
|
98 |
+
"sentence": sentence,
|
99 |
+
"score": result[0]['score'],
|
100 |
+
"index": i
|
101 |
+
})
|
102 |
+
return sensitive_issues
|
103 |
+
|
104 |
+
def generate_suggestions(text, grammar_issues, sensitive_issues):
|
105 |
+
suggestions = []
|
106 |
+
for issue in grammar_issues:
|
107 |
+
if issue['suggestions']:
|
108 |
+
suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})")
|
109 |
+
for issue in sensitive_issues:
|
110 |
+
try:
|
111 |
+
summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text']
|
112 |
+
suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})")
|
113 |
+
except Exception as e:
|
114 |
+
suggestions.append(f"Failed to rephrase '{issue['sentence']}': {str(e)}")
|
115 |
+
return suggestions
|
116 |
+
|
117 |
+
def highlight_text(text, grammar_issues, sensitive_issues):
|
118 |
+
highlighted = text
|
119 |
+
offset_adjust = 0
|
120 |
+
for issue in grammar_issues:
|
121 |
+
start = issue['offset'] + offset_adjust
|
122 |
+
end = start + issue['length']
|
123 |
+
error_text = highlighted[start:end]
|
124 |
+
highlighted = highlighted[:start] + f"<span style='background-color: yellow'>{error_text}</span>" + highlighted[end:]
|
125 |
+
offset_adjust += len("<span style='background-color: yellow'>") + len("</span>")
|
126 |
+
sentences = sent_tokenize(text)
|
127 |
+
offset_adjust = 0
|
128 |
+
for issue in sensitive_issues:
|
129 |
+
sentence = issue['sentence']
|
130 |
+
start = highlighted.find(sentence, offset_adjust)
|
131 |
+
if start != -1:
|
132 |
+
end = start + len(sentence)
|
133 |
+
highlighted = highlighted[:start] + f"<span style='background-color: red'>{sentence}</span>" + highlighted[end:]
|
134 |
+
offset_adjust = end
|
135 |
+
return highlighted
|
136 |
+
|
137 |
+
# Main function to process input
|
138 |
+
def review_blog(input_type, text_input, url_input):
|
139 |
+
if not text_input and not url_input:
|
140 |
+
return "Please provide text or a URL.", "", []
|
141 |
+
|
142 |
+
# Step 1: Text Extraction
|
143 |
+
text = extract_text(input_type, text_input, url_input)
|
144 |
+
if text.startswith("Error"):
|
145 |
+
return text, "", []
|
146 |
+
|
147 |
+
# Step 2: Grammar Check
|
148 |
+
grammar_issues = check_grammar(text)
|
149 |
+
|
150 |
+
# Step 3: Sensitive Content Detection
|
151 |
+
sensitive_issues = detect_sensitive_content(text)
|
152 |
+
|
153 |
+
# Step 4: Generate Suggestions
|
154 |
+
suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
|
155 |
+
|
156 |
+
# Step 5: Coordinate Output
|
157 |
+
highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
|
158 |
+
suggestions_text = "\n".join([f"{i+1}. {sug}" for i, sug in enumerate(suggestions)])
|
159 |
+
|
160 |
+
return highlighted_text, suggestions_text, suggestions
|
161 |
+
|
162 |
+
def apply_changes(text, suggestions, approved_indices):
|
163 |
+
sentences = sent_tokenize(text)
|
164 |
+
for idx in approved_indices.split(','):
|
165 |
+
try:
|
166 |
+
idx = int(idx.strip()) - 1
|
167 |
+
if idx < len(suggestions):
|
168 |
+
suggestion = suggestions[idx]
|
169 |
+
match = re.search(r"'([^']+)'$", suggestion)
|
170 |
+
if match:
|
171 |
+
new_text = match.group(1)
|
172 |
+
if "Rephrase sensitive content" in suggestion:
|
173 |
+
orig_match = re.search(r"'([^']+)'\s+to:", suggestion)
|
174 |
+
if orig_match:
|
175 |
+
orig_sentence = orig_match.group(1)
|
176 |
+
text = text.replace(orig_sentence, new_text)
|
177 |
+
else:
|
178 |
+
orig_match = re.search(r"Replace '([^']+)'\s+with\s+'([^']+)'", suggestion)
|
179 |
+
if orig_match:
|
180 |
+
orig_text = orig_match.group(1)
|
181 |
+
text = text.replace(orig_text, new_text)
|
182 |
+
except ValueError:
|
183 |
+
continue # Skip invalid indices
|
184 |
+
return text
|
185 |
+
|
186 |
+
# Gradio interface
|
187 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
188 |
+
gr.Markdown("# AI Blog Reviewer with AutoGen")
|
189 |
+
gr.Markdown("Enter blog text or a URL to review for spelling, grammar, and sensitive content. Approve suggested changes to update the text.")
|
190 |
+
|
191 |
+
input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
|
192 |
+
text_input = gr.Textbox(label="Blog Text", placeholder="Enter your blog text here...", lines=10, visible=True)
|
193 |
+
url_input = gr.Textbox(label="Blog URL", placeholder="Enter the blog URL here...", visible=False)
|
194 |
+
|
195 |
+
def toggle_input(input_type):
|
196 |
+
return {
|
197 |
+
text_input: gr.update(visible=input_type == "Text"),
|
198 |
+
url_input: gr.update(visible=input_type == "URL")
|
199 |
+
}
|
200 |
+
|
201 |
+
input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])
|
202 |
+
|
203 |
+
review_button = gr.Button("Review Content")
|
204 |
+
highlighted_output = gr.HTML(label="Highlighted Issues (Yellow: Grammar, Red: Sensitive)")
|
205 |
+
suggestions_output = gr.Textbox(label="Suggestions", lines=10)
|
206 |
+
approve_indices = gr.Textbox(label="Approve Suggestions (Enter numbers, e.g., '1,2,3')")
|
207 |
+
apply_button = gr.Button("Apply Approved Changes")
|
208 |
+
final_text = gr.Textbox(label="Final Text", lines=10)
|
209 |
+
|
210 |
+
suggestions_state = gr.State()
|
211 |
+
|
212 |
+
review_button.click(
|
213 |
+
fn=review_blog,
|
214 |
+
inputs=[input_type, text_input, url_input],
|
215 |
+
outputs=[highlighted_output, suggestions_output, suggestions_state]
|
216 |
+
)
|
217 |
+
|
218 |
+
apply_button.click(
|
219 |
+
fn=apply_changes,
|
220 |
+
inputs=[text_input, suggestions_state, approve_indices],
|
221 |
+
outputs=final_text
|
222 |
+
)
|
223 |
+
|
224 |
+
# Launch Gradio interface
|
225 |
demo.launch()
|