JaishnaCodz commited on
Commit
56f7cbb
·
verified ·
1 Parent(s): 3ee9037

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -220
app.py CHANGED
@@ -1,252 +1,152 @@
1
  import gradio as gr
2
- import language_tool_python
3
- import requests
4
- from newspaper import Article
5
  from transformers import pipeline
6
- import re
 
7
  import nltk
 
8
  from nltk.tokenize import sent_tokenize
9
- from autogen import AssistantAgent, UserProxyAgent
10
-
11
- # Download required NLTK data at startup
12
- try:
13
- nltk.download('punkt')
14
- nltk.download('punkt_tab') # Ensure language-specific data is downloaded
15
- print("NLTK data (punkt and punkt_tab) downloaded successfully.")
16
- except Exception as e:
17
- print(f"Error downloading NLTK data: {str(e)}")
18
-
19
- # Initialize Hugging Face models
20
- try:
21
- toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
22
- print("Toxicity classifier loaded successfully.")
23
- except Exception as e:
24
- print(f"Error loading toxicity classifier: {str(e)}")
25
-
26
- try:
27
- summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
28
- print("Summarizer loaded successfully.")
29
- except Exception as e:
30
- print(f"Error loading summarizer: {str(e)}")
31
-
32
- # Define AutoGen Agents (for modularity, but we'll call functions directly)
33
- user_proxy = UserProxyAgent(
34
- name="UserProxy",
35
- system_message="Coordinates tasks and passes inputs to other agents.",
36
- human_input_mode="NEVER",
37
- code_execution_config={"work_dir": "autogen_workdir", "use_docker": False}
38
- )
39
-
40
- text_extraction_agent = AssistantAgent(
41
- name="TextExtractor",
42
- system_message="Extracts text from URLs or processes raw text."
43
- )
44
-
45
- grammar_check_agent = AssistantAgent(
46
- name="GrammarChecker",
47
- system_message="Identifies spelling and grammar errors using LanguageTool."
48
- )
49
-
50
- sensitive_content_agent = AssistantAgent(
51
- name="SensitiveContentDetector",
52
- system_message="Detects toxic or sensitive content (e.g., racism, gender bias)."
53
- )
54
-
55
- suggestion_agent = AssistantAgent(
56
- name="SuggestionGenerator",
57
- system_message="Generates suggestions to fix grammar and rephrase sensitive content."
58
- )
59
-
60
- coordinator_agent = AssistantAgent(
61
- name="Coordinator",
62
- system_message="Combines results, highlights issues, and formats outputs."
63
- )
64
-
65
- # Task functions
66
  def extract_text(input_type, text_input, url_input):
67
- try:
68
- if input_type == "URL" and url_input:
69
- article = Article(url_input)
70
- article.download()
71
- article.parse()
72
- print(f"Extracted text from URL: {url_input[:50]}...") # Debug
73
- return article.text
74
- print(f"Using input text: {text_input[:50]}...") # Debug
75
- return text_input
76
- except Exception as e:
77
- print(f"Error in extract_text: {str(e)}")
78
- return f"Error fetching URL or processing text: {str(e)}"
79
 
80
  def check_grammar(text):
81
- try:
82
- grammar_tool = language_tool_python.LanguageToolPublicAPI('en-US')
83
- print("Checking grammar...") # Debug
84
- matches = grammar_tool.check(text)
85
- return [
86
- {
87
- "text": match.context,
88
- "error": match.message,
89
- "suggestions": match.replacements,
90
- "offset": match.offset,
91
- "length": match.errorLength
92
- } for match in matches
93
- ]
94
- except Exception as e:
95
- print(f"Grammar check failed: {str(e)}")
96
- return [{"text": "", "error": f"Grammar check failed: {str(e)}", "suggestions": [], "offset": 0, "length": 0}]
97
 
98
  def detect_sensitive_content(text):
99
- try:
100
- sentences = sent_tokenize(text)
101
- sensitive_issues = []
102
- for i, sentence in enumerate(sentences):
103
- result = toxicity_classifier(sentence)
104
- if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
105
- sensitive_issues.append({
106
- "sentence": sentence,
107
- "score": result[0]['score'],
108
- "index": i
109
- })
110
- print(f"Detected {len(sensitive_issues)} sensitive issues.") # Debug
111
- return sensitive_issues
112
- except Exception as e:
113
- print(f"Error in detect_sensitive_content: {str(e)}")
114
- return []
115
 
116
  def generate_suggestions(text, grammar_issues, sensitive_issues):
117
- try:
118
- suggestions = []
119
- for issue in grammar_issues:
120
- if issue['suggestions']:
121
- suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})")
122
- for issue in sensitive_issues:
123
- summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text']
124
- suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})")
125
- print(f"Generated {len(suggestions)} suggestions.") # Debug
126
- return suggestions
127
- except Exception as e:
128
- print(f"Error in generate_suggestions: {str(e)}")
129
- return []
130
 
131
  def highlight_text(text, grammar_issues, sensitive_issues):
132
- try:
133
- highlighted = text
134
- offset_adjust = 0
135
- for issue in grammar_issues:
136
- start = issue['offset'] + offset_adjust
137
- end = start + issue['length']
138
- error_text = highlighted[start:end]
139
- highlighted = highlighted[:start] + f"<span style='background-color: yellow'>{error_text}</span>" + highlighted[end:]
140
- offset_adjust += len("<span style='background-color: yellow'>") + len("</span>")
141
- sentences = sent_tokenize(text)
142
- offset_adjust = 0
143
- for issue in sensitive_issues:
144
- sentence = issue['sentence']
145
- start = highlighted.find(sentence, offset_adjust)
146
- if start != -1:
147
- end = start + len(sentence)
148
- highlighted = highlighted[:start] + f"<span style='background-color: red'>{sentence}</span>" + highlighted[end:]
149
- offset_adjust = end
150
- return highlighted
151
- except Exception as e:
152
- print(f"Error in highlight_text: {str(e)}")
153
- return text
154
-
155
- # Main function to process input
156
- def review_blog(input_type, text_input, url_input):
157
- try:
158
- if not text_input and not url_input:
159
- return "Please provide text or a URL.", "", []
160
-
161
- # Step 1: Text Extraction
162
- text = extract_text(input_type, text_input, url_input)
163
- print(f"Processed text: {text[:50]}...") # Debug
164
- if text.startswith("Error"):
165
- return text, "", []
166
 
167
- # Step 2: Grammar Check
168
- grammar_issues = check_grammar(text)
169
-
170
- # Step 3: Sensitive Content Detection
171
- sensitive_issues = detect_sensitive_content(text)
172
-
173
- # Step 4: Generate Suggestions
174
- suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
175
 
176
- # Step 5: Coordinate Output
177
- highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
178
- suggestions_text = "\n".join([f"{i+1}. {sug}" for i, sug in enumerate(suggestions)])
 
 
 
179
 
180
- return highlighted_text, suggestions_text, suggestions
181
- except Exception as e:
182
- print(f"Error in review_blog: {str(e)}")
183
- return f"Error processing input: {str(e)}", "", []
184
 
185
  def apply_changes(text, suggestions, approved_indices):
186
- try:
187
- sentences = sent_tokenize(text)
188
- for idx in approved_indices.split(','):
189
- try:
190
- idx = int(idx.strip()) - 1
191
- if idx < len(suggestions):
192
- suggestion = suggestions[idx]
193
- match = re.search(r"'([^']+)'$", suggestion)
194
- if match:
195
- new_text = match.group(1)
196
- if "Rephrase sensitive content" in suggestion:
197
- orig_match = re.search(r"'([^']+)'\s+to:", suggestion)
198
- if orig_match:
199
- orig_sentence = orig_match.group(1)
200
- text = text.replace(orig_sentence, new_text)
201
- else:
202
- orig_match = re.search(r"Replace '([^']+)'\s+with\s+'([^']+)'", suggestion)
203
- if orig_match:
204
- orig_text = orig_match.group(1)
205
- text = text.replace(orig_text, new_text)
206
- except ValueError:
207
- continue # Skip invalid indices
208
- return text
209
- except Exception as e:
210
- print(f"Error in apply_changes: {str(e)}")
211
- return text
212
-
213
- # Gradio interface
214
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
215
- gr.Markdown("# AI Blog Reviewer with AutoGen")
216
- gr.Markdown("Enter blog text or a URL to review for spelling, grammar, and sensitive content. Approve suggested changes to update the text.")
217
 
218
  input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
219
- text_input = gr.Textbox(label="Blog Text", placeholder="Enter your blog text here...", lines=10, visible=True)
220
- url_input = gr.Textbox(label="Blog URL", placeholder="Enter the blog URL here...", visible=False)
221
 
222
- def toggle_input(input_type):
223
  return {
224
- text_input: gr.update(visible=input_type == "Text"),
225
- url_input: gr.update(visible=input_type == "URL")
226
  }
227
 
228
  input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])
229
 
230
- review_button = gr.Button("Review Content")
231
- highlighted_output = gr.HTML(label="Highlighted Issues (Yellow: Grammar, Red: Sensitive)")
232
- suggestions_output = gr.Textbox(label="Suggestions", lines=10)
233
- approve_indices = gr.Textbox(label="Approve Suggestions (Enter numbers, e.g., '1,2,3')")
234
- apply_button = gr.Button("Apply Approved Changes")
235
- final_text = gr.Textbox(label="Final Text", lines=10)
236
 
237
  suggestions_state = gr.State()
238
 
239
- review_button.click(
240
- fn=review_blog,
241
- inputs=[input_type, text_input, url_input],
242
- outputs=[highlighted_output, suggestions_output, suggestions_state]
243
- )
244
 
245
- apply_button.click(
246
- fn=apply_changes,
247
- inputs=[text_input, suggestions_state, approve_indices],
248
- outputs=final_text
249
- )
250
 
251
- # Launch Gradio interface
252
- demo.launch()
 
1
  import gradio as gr
 
 
 
2
  from transformers import pipeline
3
+ from newspaper import Article
4
+ import language_tool_python
5
  import nltk
6
+ import re
7
  from nltk.tokenize import sent_tokenize
8
+
9
+ # Download punkt for sentence tokenization
10
+ nltk.download("punkt")
11
+
12
+ # Load the grammar tool
13
+ grammar_tool = language_tool_python.LanguageToolPublicAPI('en-US')
14
+
15
+ # Load models
16
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
17
+ toxicity_classifier = pipeline("text-classification", model="Hate-speech-CNERG/bert-base-uncased-hatexplain")
18
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  def extract_text(input_type, text_input, url_input):
20
+ if input_type == "URL" and url_input:
21
+ article = Article(url_input)
22
+ article.download()
23
+ article.parse()
24
+ return article.text
25
+ return text_input
 
 
 
 
 
 
26
 
27
  def check_grammar(text):
28
+ matches = grammar_tool.check(text)
29
+ return [
30
+ {
31
+ "text": match.context,
32
+ "error": match.message,
33
+ "suggestions": match.replacements,
34
+ "offset": match.offset,
35
+ "length": match.errorLength
36
+ } for match in matches
37
+ ]
 
 
 
 
 
 
38
 
39
  def detect_sensitive_content(text):
40
+ sentences = sent_tokenize(text)
41
+ sensitive_issues = []
42
+ for i, sentence in enumerate(sentences):
43
+ result = toxicity_classifier(sentence)
44
+ label = result[0]['label'].lower()
45
+ if any(term in label for term in ['toxic', 'hate', 'offensive']):
46
+ sensitive_issues.append({
47
+ "sentence": sentence,
48
+ "score": result[0]['score'],
49
+ "label": label,
50
+ "index": i
51
+ })
52
+ return sensitive_issues
 
 
 
53
 
54
  def generate_suggestions(text, grammar_issues, sensitive_issues):
55
+ suggestions = []
56
+ for issue in grammar_issues:
57
+ if issue['suggestions']:
58
+ suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})")
59
+ for issue in sensitive_issues:
60
+ summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text']
61
+ suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})")
62
+ return suggestions
 
 
 
 
 
63
 
64
  def highlight_text(text, grammar_issues, sensitive_issues):
65
+ highlighted = text
66
+ offset_adjust = 0
67
+ for issue in grammar_issues:
68
+ start = issue['offset'] + offset_adjust
69
+ end = start + issue['length']
70
+ error_text = highlighted[start:end]
71
+ span = f"<span style='background-color: yellow'>{error_text}</span>"
72
+ highlighted = highlighted[:start] + span + highlighted[end:]
73
+ offset_adjust += len(span) - len(error_text)
74
+
75
+ sentences = sent_tokenize(text)
76
+ for issue in sensitive_issues:
77
+ sentence = issue['sentence']
78
+ highlighted = highlighted.replace(sentence, f"<span style='background-color: red'>{sentence}</span>")
79
+
80
+ return highlighted
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ def review_blog(input_type, text_input, url_input):
83
+ if not text_input and not url_input:
84
+ return "Please provide text or a URL.", "", []
 
 
 
 
 
85
 
86
+ text = extract_text(input_type, text_input, url_input)
87
+ grammar_issues = check_grammar(text)
88
+ sensitive_issues = detect_sensitive_content(text)
89
+ suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
90
+ highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
91
+ suggestions_text = "\n".join([f"{i+1}. {sug}" for i, sug in enumerate(suggestions)])
92
 
93
+ return highlighted_text, suggestions_text, suggestions
 
 
 
94
 
95
  def apply_changes(text, suggestions, approved_indices):
96
+ sentences = sent_tokenize(text)
97
+ for idx in approved_indices.split(','):
98
+ try:
99
+ idx = int(idx.strip()) - 1
100
+ if idx < len(suggestions):
101
+ suggestion = suggestions[idx]
102
+ match = re.search(r"'([^']+)'$", suggestion)
103
+ if match:
104
+ new_text = match.group(1)
105
+ if "Rephrase sensitive content" in suggestion:
106
+ orig_match = re.search(r"'([^']+)'\s+to:", suggestion)
107
+ if orig_match:
108
+ orig_sentence = orig_match.group(1)
109
+ text = text.replace(orig_sentence, new_text)
110
+ else:
111
+ orig_match = re.search(r"Replace '([^']+)'\s+with\s+'([^']+)'", suggestion)
112
+ if orig_match:
113
+ orig_text = orig_match.group(1)
114
+ text = text.replace(orig_text, new_text)
115
+ except ValueError:
116
+ continue
117
+ return text
118
+
 
 
 
 
 
119
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
120
+ gr.Markdown("## 🧠 AI Blog Reviewer")
121
+ gr.Markdown("Analyze blog text or URL for grammar issues and sensitive content (bias, toxicity, etc.).")
122
 
123
  input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
124
+ text_input = gr.Textbox(label="Blog Text", lines=10, visible=True)
125
+ url_input = gr.Textbox(label="Blog URL", visible=False)
126
 
127
+ def toggle_input(type):
128
  return {
129
+ text_input: gr.update(visible=type == "Text"),
130
+ url_input: gr.update(visible=type == "URL")
131
  }
132
 
133
  input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])
134
 
135
+ review_btn = gr.Button("🔍 Review Blog")
136
+ highlighted_output = gr.HTML(label="Highlighted Output")
137
+ suggestions_output = gr.Textbox(label="Suggestions", lines=8)
138
+ approve_indices = gr.Textbox(label="Approve Suggestions (e.g., 1,2)")
139
+ apply_btn = gr.Button("Apply Suggestions")
140
+ final_output = gr.Textbox(label="Updated Text", lines=10)
141
 
142
  suggestions_state = gr.State()
143
 
144
+ review_btn.click(fn=review_blog,
145
+ inputs=[input_type, text_input, url_input],
146
+ outputs=[highlighted_output, suggestions_output, suggestions_state])
 
 
147
 
148
+ apply_btn.click(fn=apply_changes,
149
+ inputs=[text_input, suggestions_state, approve_indices],
150
+ outputs=final_output)
 
 
151
 
152
+ demo.launch()