JaishnaCodz commited on
Commit
768c740
·
verified ·
1 Parent(s): d5e169a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +224 -224
app.py CHANGED
@@ -1,225 +1,225 @@
1
- import gradio as gr
2
- import language_tool_python
3
- import requests
4
- from newspaper import Article
5
- from transformers import pipeline
6
- import re
7
- import nltk
8
- from nltk.tokenize import sent_tokenize
9
- from autogen import AssistantAgent, UserProxyAgent, config_list_from_json
10
-
11
- nltk.download('punkt')
12
-
13
- # Initialize Hugging Face models
14
- toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
15
- summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
16
-
17
- # AutoGen configuration
18
- config_list = [
19
- {
20
- "model": "local",
21
- "api_key": "none"
22
- }
23
- ]
24
-
25
- # Define AutoGen Agents (for modularity, but we'll call functions directly)
26
- user_proxy = UserProxyAgent(
27
- name="UserProxy",
28
- system_message="Coordinates tasks and passes inputs to other agents.",
29
- human_input_mode="NEVER",
30
- code_execution_config={"work_dir": "autogen_workdir", "use_docker": False}
31
- )
32
-
33
- text_extraction_agent = AssistantAgent(
34
- name="TextExtractor",
35
- system_message="Extracts text from URLs or processes raw text.",
36
- llm_config={"config_list": config_list}
37
- )
38
-
39
- grammar_check_agent = AssistantAgent(
40
- name="GrammarChecker",
41
- system_message="Identifies spelling and grammar errors using LanguageTool.",
42
- llm_config={"config_list": config_list}
43
- )
44
-
45
- sensitive_content_agent = AssistantAgent(
46
- name="SensitiveContentDetector",
47
- system_message="Detects toxic or sensitive content (e.g., racism, gender bias).",
48
- llm_config={"config_list": config_list}
49
- )
50
-
51
- suggestion_agent = AssistantAgent(
52
- name="SuggestionGenerator",
53
- system_message="Generates suggestions to fix grammar and rephrase sensitive content.",
54
- llm_config={"config_list": config_list}
55
- )
56
-
57
- coordinator_agent = AssistantAgent(
58
- name="Coordinator",
59
- system_message="Combines results, highlights issues, and formats outputs.",
60
- llm_config={"config_list": config_list}
61
- )
62
-
63
- # Task functions
64
- def extract_text(input_type, text_input, url_input):
65
- if input_type == "URL" and url_input:
66
- try:
67
- article = Article(url_input)
68
- article.download()
69
- article.parse()
70
- return article.text
71
- except Exception as e:
72
- return f"Error fetching URL: {str(e)}"
73
- return text_input
74
-
75
- def check_grammar(text):
76
- try:
77
- grammar_tool = language_tool_python.LanguageTool('en-US')
78
- matches = grammar_tool.check(text)
79
- return [
80
- {
81
- "text": match.context,
82
- "error": match.message,
83
- "suggestions": match.replacements,
84
- "offset": match.offset,
85
- "length": match.errorLength
86
- } for match in matches
87
- ]
88
- except Exception as e:
89
- return [{"text": "", "error": f"Grammar check failed: {str(e)}", "suggestions": [], "offset": 0, "length": 0}]
90
-
91
- def detect_sensitive_content(text):
92
- sentences = sent_tokenize(text)
93
- sensitive_issues = []
94
- for i, sentence in enumerate(sentences):
95
- result = toxicity_classifier(sentence)
96
- if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
97
- sensitive_issues.append({
98
- "sentence": sentence,
99
- "score": result[0]['score'],
100
- "index": i
101
- })
102
- return sensitive_issues
103
-
104
- def generate_suggestions(text, grammar_issues, sensitive_issues):
105
- suggestions = []
106
- for issue in grammar_issues:
107
- if issue['suggestions']:
108
- suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})")
109
- for issue in sensitive_issues:
110
- try:
111
- summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text']
112
- suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})")
113
- except Exception as e:
114
- suggestions.append(f"Failed to rephrase '{issue['sentence']}': {str(e)}")
115
- return suggestions
116
-
117
- def highlight_text(text, grammar_issues, sensitive_issues):
118
- highlighted = text
119
- offset_adjust = 0
120
- for issue in grammar_issues:
121
- start = issue['offset'] + offset_adjust
122
- end = start + issue['length']
123
- error_text = highlighted[start:end]
124
- highlighted = highlighted[:start] + f"<span style='background-color: yellow'>{error_text}</span>" + highlighted[end:]
125
- offset_adjust += len("<span style='background-color: yellow'>") + len("</span>")
126
- sentences = sent_tokenize(text)
127
- offset_adjust = 0
128
- for issue in sensitive_issues:
129
- sentence = issue['sentence']
130
- start = highlighted.find(sentence, offset_adjust)
131
- if start != -1:
132
- end = start + len(sentence)
133
- highlighted = highlighted[:start] + f"<span style='background-color: red'>{sentence}</span>" + highlighted[end:]
134
- offset_adjust = end
135
- return highlighted
136
-
137
- # Main function to process input
138
- def review_blog(input_type, text_input, url_input):
139
- if not text_input and not url_input:
140
- return "Please provide text or a URL.", "", []
141
-
142
- # Step 1: Text Extraction
143
- text = extract_text(input_type, text_input, url_input)
144
- if text.startswith("Error"):
145
- return text, "", []
146
-
147
- # Step 2: Grammar Check
148
- grammar_issues = check_grammar(text)
149
-
150
- # Step 3: Sensitive Content Detection
151
- sensitive_issues = detect_sensitive_content(text)
152
-
153
- # Step 4: Generate Suggestions
154
- suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
155
-
156
- # Step 5: Coordinate Output
157
- highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
158
- suggestions_text = "\n".join([f"{i+1}. {sug}" for i, sug in enumerate(suggestions)])
159
-
160
- return highlighted_text, suggestions_text, suggestions
161
-
162
- def apply_changes(text, suggestions, approved_indices):
163
- sentences = sent_tokenize(text)
164
- for idx in approved_indices.split(','):
165
- try:
166
- idx = int(idx.strip()) - 1
167
- if idx < len(suggestions):
168
- suggestion = suggestions[idx]
169
- match = re.search(r"'([^']+)'$", suggestion)
170
- if match:
171
- new_text = match.group(1)
172
- if "Rephrase sensitive content" in suggestion:
173
- orig_match = re.search(r"'([^']+)'\s+to:", suggestion)
174
- if orig_match:
175
- orig_sentence = orig_match.group(1)
176
- text = text.replace(orig_sentence, new_text)
177
- else:
178
- orig_match = re.search(r"Replace '([^']+)'\s+with\s+'([^']+)'", suggestion)
179
- if orig_match:
180
- orig_text = orig_match.group(1)
181
- text = text.replace(orig_text, new_text)
182
- except ValueError:
183
- continue # Skip invalid indices
184
- return text
185
-
186
- # Gradio interface
187
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
188
- gr.Markdown("# AI Blog Reviewer with AutoGen")
189
- gr.Markdown("Enter blog text or a URL to review for spelling, grammar, and sensitive content. Approve suggested changes to update the text.")
190
-
191
- input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
192
- text_input = gr.Textbox(label="Blog Text", placeholder="Enter your blog text here...", lines=10, visible=True)
193
- url_input = gr.Textbox(label="Blog URL", placeholder="Enter the blog URL here...", visible=False)
194
-
195
- def toggle_input(input_type):
196
- return {
197
- text_input: gr.update(visible=input_type == "Text"),
198
- url_input: gr.update(visible=input_type == "URL")
199
- }
200
-
201
- input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])
202
-
203
- review_button = gr.Button("Review Content")
204
- highlighted_output = gr.HTML(label="Highlighted Issues (Yellow: Grammar, Red: Sensitive)")
205
- suggestions_output = gr.Textbox(label="Suggestions", lines=10)
206
- approve_indices = gr.Textbox(label="Approve Suggestions (Enter numbers, e.g., '1,2,3')")
207
- apply_button = gr.Button("Apply Approved Changes")
208
- final_text = gr.Textbox(label="Final Text", lines=10)
209
-
210
- suggestions_state = gr.State()
211
-
212
- review_button.click(
213
- fn=review_blog,
214
- inputs=[input_type, text_input, url_input],
215
- outputs=[highlighted_output, suggestions_output, suggestions_state]
216
- )
217
-
218
- apply_button.click(
219
- fn=apply_changes,
220
- inputs=[text_input, suggestions_state, approve_indices],
221
- outputs=final_text
222
- )
223
-
224
- # Launch Gradio interface
225
  demo.launch()
 
1
+ import gradio as gr
2
+ import language_tool_python
3
+ import requests
4
+ from newspaper import Article
5
+ from transformers import pipeline
6
+ import re
7
+ import nltk
8
+ from nltk.tokenize import sent_tokenize
9
+ from autogen import AssistantAgent, UserProxyAgent, config_list_from_json
10
+
11
+ nltk.download('punkt')
12
+
13
+ # Initialize Hugging Face models
14
+ toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
15
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
16
+
17
+ # AutoGen configuration
18
+ config_list = [
19
+ {
20
+ "model": "local",
21
+ "api_key": "none"
22
+ }
23
+ ]
24
+
25
+ # Define AutoGen Agents (for modularity, but we'll call functions directly)
26
+ user_proxy = UserProxyAgent(
27
+ name="UserProxy",
28
+ system_message="Coordinates tasks and passes inputs to other agents.",
29
+ human_input_mode="NEVER",
30
+ code_execution_config={"work_dir": "autogen_workdir", "use_docker": False}
31
+ )
32
+
33
+ text_extraction_agent = AssistantAgent(
34
+ name="TextExtractor",
35
+ system_message="Extracts text from URLs or processes raw text.",
36
+ llm_config={"config_list": config_list}
37
+ )
38
+
39
+ grammar_check_agent = AssistantAgent(
40
+ name="GrammarChecker",
41
+ system_message="Identifies spelling and grammar errors using LanguageTool.",
42
+ llm_config={"config_list": config_list}
43
+ )
44
+
45
+ sensitive_content_agent = AssistantAgent(
46
+ name="SensitiveContentDetector",
47
+ system_message="Detects toxic or sensitive content (e.g., racism, gender bias).",
48
+ llm_config={"config_list": config_list}
49
+ )
50
+
51
+ suggestion_agent = AssistantAgent(
52
+ name="SuggestionGenerator",
53
+ system_message="Generates suggestions to fix grammar and rephrase sensitive content.",
54
+ llm_config={"config_list": config_list}
55
+ )
56
+
57
+ coordinator_agent = AssistantAgent(
58
+ name="Coordinator",
59
+ system_message="Combines results, highlights issues, and formats outputs.",
60
+ llm_config={"config_list": config_list}
61
+ )
62
+
63
+ # Task functions
64
+ def extract_text(input_type, text_input, url_input):
65
+ if input_type == "URL" and url_input:
66
+ try:
67
+ article = Article(url_input)
68
+ article.download()
69
+ article.parse()
70
+ return article.text
71
+ except Exception as e:
72
+ return f"Error fetching URL: {str(e)}"
73
+ return text_input
74
+
75
+ def check_grammar(text):
76
+ try:
77
+ grammar_tool = language_tool_python.LanguageToolPublicAPI('en-US')
78
+ matches = grammar_tool.check(text)
79
+ return [
80
+ {
81
+ "text": match.context,
82
+ "error": match.message,
83
+ "suggestions": match.replacements,
84
+ "offset": match.offset,
85
+ "length": match.errorLength
86
+ } for match in matches
87
+ ]
88
+ except Exception as e:
89
+ return [{"text": "", "error": f"Grammar check failed: {str(e)}", "suggestions": [], "offset": 0, "length": 0}]
90
+
91
+ def detect_sensitive_content(text):
92
+ sentences = sent_tokenize(text)
93
+ sensitive_issues = []
94
+ for i, sentence in enumerate(sentences):
95
+ result = toxicity_classifier(sentence)
96
+ if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
97
+ sensitive_issues.append({
98
+ "sentence": sentence,
99
+ "score": result[0]['score'],
100
+ "index": i
101
+ })
102
+ return sensitive_issues
103
+
104
+ def generate_suggestions(text, grammar_issues, sensitive_issues):
105
+ suggestions = []
106
+ for issue in grammar_issues:
107
+ if issue['suggestions']:
108
+ suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})")
109
+ for issue in sensitive_issues:
110
+ try:
111
+ summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text']
112
+ suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})")
113
+ except Exception as e:
114
+ suggestions.append(f"Failed to rephrase '{issue['sentence']}': {str(e)}")
115
+ return suggestions
116
+
117
+ def highlight_text(text, grammar_issues, sensitive_issues):
118
+ highlighted = text
119
+ offset_adjust = 0
120
+ for issue in grammar_issues:
121
+ start = issue['offset'] + offset_adjust
122
+ end = start + issue['length']
123
+ error_text = highlighted[start:end]
124
+ highlighted = highlighted[:start] + f"<span style='background-color: yellow'>{error_text}</span>" + highlighted[end:]
125
+ offset_adjust += len("<span style='background-color: yellow'>") + len("</span>")
126
+ sentences = sent_tokenize(text)
127
+ offset_adjust = 0
128
+ for issue in sensitive_issues:
129
+ sentence = issue['sentence']
130
+ start = highlighted.find(sentence, offset_adjust)
131
+ if start != -1:
132
+ end = start + len(sentence)
133
+ highlighted = highlighted[:start] + f"<span style='background-color: red'>{sentence}</span>" + highlighted[end:]
134
+ offset_adjust = end
135
+ return highlighted
136
+
137
+ # Main function to process input
138
+ def review_blog(input_type, text_input, url_input):
139
+ if not text_input and not url_input:
140
+ return "Please provide text or a URL.", "", []
141
+
142
+ # Step 1: Text Extraction
143
+ text = extract_text(input_type, text_input, url_input)
144
+ if text.startswith("Error"):
145
+ return text, "", []
146
+
147
+ # Step 2: Grammar Check
148
+ grammar_issues = check_grammar(text)
149
+
150
+ # Step 3: Sensitive Content Detection
151
+ sensitive_issues = detect_sensitive_content(text)
152
+
153
+ # Step 4: Generate Suggestions
154
+ suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
155
+
156
+ # Step 5: Coordinate Output
157
+ highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
158
+ suggestions_text = "\n".join([f"{i+1}. {sug}" for i, sug in enumerate(suggestions)])
159
+
160
+ return highlighted_text, suggestions_text, suggestions
161
+
162
+ def apply_changes(text, suggestions, approved_indices):
163
+ sentences = sent_tokenize(text)
164
+ for idx in approved_indices.split(','):
165
+ try:
166
+ idx = int(idx.strip()) - 1
167
+ if idx < len(suggestions):
168
+ suggestion = suggestions[idx]
169
+ match = re.search(r"'([^']+)'$", suggestion)
170
+ if match:
171
+ new_text = match.group(1)
172
+ if "Rephrase sensitive content" in suggestion:
173
+ orig_match = re.search(r"'([^']+)'\s+to:", suggestion)
174
+ if orig_match:
175
+ orig_sentence = orig_match.group(1)
176
+ text = text.replace(orig_sentence, new_text)
177
+ else:
178
+ orig_match = re.search(r"Replace '([^']+)'\s+with\s+'([^']+)'", suggestion)
179
+ if orig_match:
180
+ orig_text = orig_match.group(1)
181
+ text = text.replace(orig_text, new_text)
182
+ except ValueError:
183
+ continue # Skip invalid indices
184
+ return text
185
+
186
+ # Gradio interface
187
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
188
+ gr.Markdown("# AI Blog Reviewer with AutoGen")
189
+ gr.Markdown("Enter blog text or a URL to review for spelling, grammar, and sensitive content. Approve suggested changes to update the text.")
190
+
191
+ input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
192
+ text_input = gr.Textbox(label="Blog Text", placeholder="Enter your blog text here...", lines=10, visible=True)
193
+ url_input = gr.Textbox(label="Blog URL", placeholder="Enter the blog URL here...", visible=False)
194
+
195
+ def toggle_input(input_type):
196
+ return {
197
+ text_input: gr.update(visible=input_type == "Text"),
198
+ url_input: gr.update(visible=input_type == "URL")
199
+ }
200
+
201
+ input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])
202
+
203
+ review_button = gr.Button("Review Content")
204
+ highlighted_output = gr.HTML(label="Highlighted Issues (Yellow: Grammar, Red: Sensitive)")
205
+ suggestions_output = gr.Textbox(label="Suggestions", lines=10)
206
+ approve_indices = gr.Textbox(label="Approve Suggestions (Enter numbers, e.g., '1,2,3')")
207
+ apply_button = gr.Button("Apply Approved Changes")
208
+ final_text = gr.Textbox(label="Final Text", lines=10)
209
+
210
+ suggestions_state = gr.State()
211
+
212
+ review_button.click(
213
+ fn=review_blog,
214
+ inputs=[input_type, text_input, url_input],
215
+ outputs=[highlighted_output, suggestions_output, suggestions_state]
216
+ )
217
+
218
+ apply_button.click(
219
+ fn=apply_changes,
220
+ inputs=[text_input, suggestions_state, approve_indices],
221
+ outputs=final_text
222
+ )
223
+
224
+ # Launch Gradio interface
225
  demo.launch()