JaishnaCodz commited on
Commit
9994894
·
verified ·
1 Parent(s): 2aca556

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -60
app.py CHANGED
@@ -1,58 +1,52 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
- from newspaper import Article
4
  import language_tool_python
5
  import nltk
6
- import re
 
7
  from nltk.tokenize import sent_tokenize
 
8
 
9
- # Download punkt tokenizer
10
  nltk.download("punkt")
11
 
12
- # Connect to the local LanguageTool server started via setup.sh
13
- grammar_tool = language_tool_python.LanguageToolPublicAPI(language='en-US', endpoint='http://localhost:8081/')
 
 
 
14
 
15
- # Load transformers pipelines
 
16
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
17
- toxicity_classifier = pipeline("text-classification", model="Hate-speech-CNERG/bert-base-uncased-hatexplain")
18
 
19
  def extract_text(input_type, text_input, url_input):
20
  if input_type == "URL" and url_input:
21
- try:
22
- article = Article(url_input)
23
- article.download()
24
- article.parse()
25
- return article.text
26
- except Exception as e:
27
- return f"Error fetching URL: {e}"
28
  return text_input
29
 
30
  def check_grammar(text):
31
- try:
32
- matches = grammar_tool.check(text)
33
- return [
34
- {
35
- "text": match.context,
36
- "error": match.message,
37
- "suggestions": match.replacements,
38
- "offset": match.offset,
39
- "length": match.errorLength
40
- } for match in matches
41
- ]
42
- except Exception as e:
43
- return [{"text": "", "error": f"Grammar check failed: {str(e)}", "suggestions": [], "offset": 0, "length": 0}]
44
 
45
  def detect_sensitive_content(text):
46
  sentences = sent_tokenize(text)
47
  sensitive_issues = []
48
  for i, sentence in enumerate(sentences):
49
  result = toxicity_classifier(sentence)
50
- label = result[0]['label'].lower()
51
- if any(term in label for term in ['toxic', 'hate', 'offensive']):
52
  sensitive_issues.append({
53
  "sentence": sentence,
54
  "score": result[0]['score'],
55
- "label": label,
56
  "index": i
57
  })
58
  return sensitive_issues
@@ -77,27 +71,22 @@ def highlight_text(text, grammar_issues, sensitive_issues):
77
  span = f"<span style='background-color: yellow'>{error_text}</span>"
78
  highlighted = highlighted[:start] + span + highlighted[end:]
79
  offset_adjust += len(span) - len(error_text)
80
-
81
  for issue in sensitive_issues:
82
- sentence = issue['sentence']
83
- highlighted = highlighted.replace(sentence, f"<span style='background-color: red'>{sentence}</span>")
84
-
 
85
  return highlighted
86
 
87
  def review_blog(input_type, text_input, url_input):
88
  if not text_input and not url_input:
89
  return "Please provide text or a URL.", "", []
90
-
91
  text = extract_text(input_type, text_input, url_input)
92
- if text.startswith("Error"):
93
- return text, "", []
94
-
95
  grammar_issues = check_grammar(text)
96
  sensitive_issues = detect_sensitive_content(text)
97
  suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
98
  highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
99
- suggestions_text = "\n".join([f"{i+1}. {sug}" for i, sug in enumerate(suggestions)])
100
-
101
  return highlighted_text, suggestions_text, suggestions
102
 
103
  def apply_changes(text, suggestions, approved_indices):
@@ -116,18 +105,16 @@ def apply_changes(text, suggestions, approved_indices):
116
  orig_sentence = orig_match.group(1)
117
  text = text.replace(orig_sentence, new_text)
118
  else:
119
- orig_match = re.search(r"Replace '([^']+)'\s+with\s+'([^']+)'", suggestion)
120
  if orig_match:
121
- orig_text = orig_match.group(1)
122
- text = text.replace(orig_text, new_text)
123
  except ValueError:
124
  continue
125
  return text
126
 
127
- # Gradio UI
128
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
129
- gr.Markdown("## 🧠 AI Blog Reviewer with Grammar & Bias Detection")
130
- gr.Markdown("Enter blog content or a URL. Detect grammar issues and sensitive (toxic, biased) content.")
131
 
132
  input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
133
  text_input = gr.Textbox(label="Blog Text", lines=10, visible=True)
@@ -141,21 +128,17 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
141
 
142
  input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])
143
 
144
- review_btn = gr.Button("🔍 Review Blog")
145
- highlighted_output = gr.HTML(label="Highlighted Output")
146
- suggestions_output = gr.Textbox(label="Suggestions", lines=8)
147
- approve_indices = gr.Textbox(label="Approve Suggestions (e.g., 1,2)")
148
- apply_btn = gr.Button("Apply Suggestions")
149
- final_output = gr.Textbox(label="Updated Text", lines=10)
150
 
151
  suggestions_state = gr.State()
152
 
153
- review_btn.click(fn=review_blog,
154
- inputs=[input_type, text_input, url_input],
155
- outputs=[highlighted_output, suggestions_output, suggestions_state])
156
-
157
- apply_btn.click(fn=apply_changes,
158
- inputs=[text_input, suggestions_state, approve_indices],
159
- outputs=final_output)
160
 
161
  demo.launch()
 
1
  import gradio as gr
 
 
2
  import language_tool_python
3
  import nltk
4
+ from transformers import pipeline
5
+ from newspaper import Article
6
  from nltk.tokenize import sent_tokenize
7
+ import re
8
 
 
9
  nltk.download("punkt")
10
 
11
+ # Use local LanguageTool server (must be running on port 8081)
12
+ grammar_tool = language_tool_python.LanguageToolPublicAPI(
13
+ language='en-US',
14
+ endpoint='http://localhost:8081/v2/'
15
+ )
16
 
17
+ # Hugging Face pipelines
18
+ toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
19
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
 
20
 
21
  def extract_text(input_type, text_input, url_input):
22
  if input_type == "URL" and url_input:
23
+ article = Article(url_input)
24
+ article.download()
25
+ article.parse()
26
+ return article.text
 
 
 
27
  return text_input
28
 
29
  def check_grammar(text):
30
+ matches = grammar_tool.check(text)
31
+ return [
32
+ {
33
+ "text": match.context,
34
+ "error": match.message,
35
+ "suggestions": match.replacements,
36
+ "offset": match.offset,
37
+ "length": match.errorLength
38
+ } for match in matches
39
+ ]
 
 
 
40
 
41
  def detect_sensitive_content(text):
42
  sentences = sent_tokenize(text)
43
  sensitive_issues = []
44
  for i, sentence in enumerate(sentences):
45
  result = toxicity_classifier(sentence)
46
+ if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
 
47
  sensitive_issues.append({
48
  "sentence": sentence,
49
  "score": result[0]['score'],
 
50
  "index": i
51
  })
52
  return sensitive_issues
 
71
  span = f"<span style='background-color: yellow'>{error_text}</span>"
72
  highlighted = highlighted[:start] + span + highlighted[end:]
73
  offset_adjust += len(span) - len(error_text)
 
74
  for issue in sensitive_issues:
75
+ highlighted = highlighted.replace(
76
+ issue['sentence'],
77
+ f"<span style='background-color: red'>{issue['sentence']}</span>"
78
+ )
79
  return highlighted
80
 
81
  def review_blog(input_type, text_input, url_input):
82
  if not text_input and not url_input:
83
  return "Please provide text or a URL.", "", []
 
84
  text = extract_text(input_type, text_input, url_input)
 
 
 
85
  grammar_issues = check_grammar(text)
86
  sensitive_issues = detect_sensitive_content(text)
87
  suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
88
  highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
89
+ suggestions_text = "\n".join([f"{i+1}. {s}" for i, s in enumerate(suggestions)])
 
90
  return highlighted_text, suggestions_text, suggestions
91
 
92
  def apply_changes(text, suggestions, approved_indices):
 
105
  orig_sentence = orig_match.group(1)
106
  text = text.replace(orig_sentence, new_text)
107
  else:
108
+ orig_match = re.search(r"Replace '([^']+)' with '([^']+)'", suggestion)
109
  if orig_match:
110
+ text = text.replace(orig_match.group(1), orig_match.group(2))
 
111
  except ValueError:
112
  continue
113
  return text
114
 
 
115
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
116
+ gr.Markdown("# AI Blog Reviewer")
117
+ gr.Markdown("Highlight grammar & sensitive issues. Rephrase toxic content. Approve and apply changes.")
118
 
119
  input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
120
  text_input = gr.Textbox(label="Blog Text", lines=10, visible=True)
 
128
 
129
  input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])
130
 
131
+ review_btn = gr.Button("Review Content")
132
+ highlighted = gr.HTML()
133
+ suggestions = gr.Textbox(label="Suggestions", lines=10)
134
+ approved = gr.Textbox(label="Approve Suggestions (e.g., 1,3)")
135
+ apply_btn = gr.Button("Apply Changes")
136
+ final = gr.Textbox(label="Final Text", lines=10)
137
 
138
  suggestions_state = gr.State()
139
 
140
+ review_btn.click(fn=review_blog, inputs=[input_type, text_input, url_input],
141
+ outputs=[highlighted, suggestions, suggestions_state])
142
+ apply_btn.click(fn=apply_changes, inputs=[text_input, suggestions_state, approved], outputs=final)
 
 
 
 
143
 
144
  demo.launch()