JaishnaCodz commited on
Commit
dbbf7cc
·
verified ·
1 Parent(s): 229ed72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -119
app.py CHANGED
@@ -1,145 +1,115 @@
1
  import gradio as gr
2
- import language_tool_python
3
- import nltk
4
  from transformers import pipeline
5
  from newspaper import Article
 
6
  from nltk.tokenize import sent_tokenize
7
  import re
8
 
 
9
  nltk.download("punkt")
10
 
11
- # Correct way to connect to the locally running LanguageTool server
12
- grammar_tool = language_tool_python.LanguageTool(
13
- language='en-US',
14
- remote_server='http://localhost:8081/v2/'
15
- )
16
-
17
-
18
- # Hugging Face pipelines
19
- toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
20
- summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
21
 
 
22
  def extract_text(input_type, text_input, url_input):
23
- if input_type == "URL" and url_input:
24
- article = Article(url_input)
25
- article.download()
26
- article.parse()
27
- return article.text
28
- return text_input
29
-
30
- def check_grammar(text):
31
- matches = grammar_tool.check(text)
32
- return [
33
- {
34
- "text": match.context,
35
- "error": match.message,
36
- "suggestions": match.replacements,
37
- "offset": match.offset,
38
- "length": match.errorLength
39
- } for match in matches
40
- ]
41
-
42
- def detect_sensitive_content(text):
43
- sentences = sent_tokenize(text)
44
- sensitive_issues = []
45
- for i, sentence in enumerate(sentences):
46
- result = toxicity_classifier(sentence)
47
- if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
48
- sensitive_issues.append({
49
- "sentence": sentence,
50
- "score": result[0]['score'],
51
- "index": i
52
- })
53
- return sensitive_issues
54
-
55
- def generate_suggestions(text, grammar_issues, sensitive_issues):
56
- suggestions = []
57
- for issue in grammar_issues:
58
- if issue['suggestions']:
59
- suggestions.append(f"Replace '{issue['text']}' with '{issue['suggestions'][0]}' ({issue['error']})")
60
- for issue in sensitive_issues:
61
- summary = summarizer(issue['sentence'], max_length=50, min_length=10, do_sample=False)[0]['summary_text']
62
- suggestions.append(f"Rephrase sensitive content '{issue['sentence']}' to: '{summary}' (Toxicity score: {issue['score']:.2f})")
63
- return suggestions
64
-
65
- def highlight_text(text, grammar_issues, sensitive_issues):
66
- highlighted = text
67
- offset_adjust = 0
68
- for issue in grammar_issues:
69
- start = issue['offset'] + offset_adjust
70
- end = start + issue['length']
71
- error_text = highlighted[start:end]
72
- span = f"<span style='background-color: yellow'>{error_text}</span>"
73
- highlighted = highlighted[:start] + span + highlighted[end:]
74
- offset_adjust += len(span) - len(error_text)
75
- for issue in sensitive_issues:
76
- highlighted = highlighted.replace(
77
- issue['sentence'],
78
- f"<span style='background-color: red'>{issue['sentence']}</span>"
79
- )
80
- return highlighted
81
 
 
82
  def review_blog(input_type, text_input, url_input):
83
- if not text_input and not url_input:
84
- return "Please provide text or a URL.", "", []
85
  text = extract_text(input_type, text_input, url_input)
86
- grammar_issues = check_grammar(text)
87
- sensitive_issues = detect_sensitive_content(text)
88
- suggestions = generate_suggestions(text, grammar_issues, sensitive_issues)
89
- highlighted_text = highlight_text(text, grammar_issues, sensitive_issues)
90
- suggestions_text = "\n".join([f"{i+1}. {s}" for i, s in enumerate(suggestions)])
91
- return highlighted_text, suggestions_text, suggestions
92
-
93
- def apply_changes(text, suggestions, approved_indices):
94
  sentences = sent_tokenize(text)
95
- for idx in approved_indices.split(','):
96
- try:
97
- idx = int(idx.strip()) - 1
98
- if idx < len(suggestions):
99
- suggestion = suggestions[idx]
100
- match = re.search(r"'([^']+)'$", suggestion)
101
- if match:
102
- new_text = match.group(1)
103
- if "Rephrase sensitive content" in suggestion:
104
- orig_match = re.search(r"'([^']+)'\s+to:", suggestion)
105
- if orig_match:
106
- orig_sentence = orig_match.group(1)
107
- text = text.replace(orig_sentence, new_text)
108
- else:
109
- orig_match = re.search(r"Replace '([^']+)' with '([^']+)'", suggestion)
110
- if orig_match:
111
- text = text.replace(orig_match.group(1), orig_match.group(2))
112
- except ValueError:
113
- continue
114
- return text
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
117
- gr.Markdown("#AI Blog Reviewer")
118
- gr.Markdown("Highlight grammar & sensitive issues. Rephrase toxic content. Approve and apply changes.")
119
 
120
- input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
121
  text_input = gr.Textbox(label="Blog Text", lines=10, visible=True)
122
  url_input = gr.Textbox(label="Blog URL", visible=False)
123
 
124
- def toggle_input(type):
125
- return {
126
- text_input: gr.update(visible=type == "Text"),
127
- url_input: gr.update(visible=type == "URL")
128
- }
129
-
130
- input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])
131
 
132
- review_btn = gr.Button("Review Content")
133
- highlighted = gr.HTML()
134
- suggestions = gr.Textbox(label="Suggestions", lines=10)
135
- approved = gr.Textbox(label="Approve Suggestions (e.g., 1,3)")
136
- apply_btn = gr.Button("Apply Changes")
137
- final = gr.Textbox(label="Final Text", lines=10)
138
 
139
  suggestions_state = gr.State()
140
 
141
- review_btn.click(fn=review_blog, inputs=[input_type, text_input, url_input],
142
- outputs=[highlighted, suggestions, suggestions_state])
143
- apply_btn.click(fn=apply_changes, inputs=[text_input, suggestions_state, approved], outputs=final)
 
 
 
 
 
 
 
 
144
 
145
  demo.launch()
 
1
  import gradio as gr
 
 
2
  from transformers import pipeline
3
  from newspaper import Article
4
+ import nltk
5
  from nltk.tokenize import sent_tokenize
6
  import re
7
 
8
+ # Download punkt tokenizer
9
  nltk.download("punkt")
10
 
11
+ # Load Hugging Face pipelines
12
+ grammar_corrector = pipeline("text2text-generation", model="pszemraj/flan-t5-base-grammar-synthesis")
13
+ toxicity_detector = pipeline("text-classification", model="unitary/toxic-bert")
 
 
 
 
 
 
 
14
 
15
+ # Utility: extract blog text from URL
16
  def extract_text(input_type, text_input, url_input):
17
+ if input_type == "Text":
18
+ return text_input
19
+ elif input_type == "URL":
20
+ try:
21
+ article = Article(url_input)
22
+ article.download()
23
+ article.parse()
24
+ return article.text
25
+ except Exception as e:
26
+ return f"Error extracting article: {str(e)}"
27
+ return ""
28
+
29
+ # Highlight grammar & sensitive content
30
+ def highlight_text(text, grammar_sentences, toxic_sentences):
31
+ for s in grammar_sentences:
32
+ text = text.replace(s, f"<span style='background-color: yellow'>{s}</span>")
33
+ for s in toxic_sentences:
34
+ text = text.replace(s, f"<span style='background-color: red'>{s}</span>")
35
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ # Main blog review function
38
  def review_blog(input_type, text_input, url_input):
 
 
39
  text = extract_text(input_type, text_input, url_input)
40
+ if not text or text.startswith("Error"):
41
+ return text, "", []
42
+
 
 
 
 
 
43
  sentences = sent_tokenize(text)
44
+ grammar_issues = []
45
+ toxic_issues = []
46
+ suggestions = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ for sent in sentences:
49
+ # Check grammar by comparing original and corrected
50
+ corrected = grammar_corrector(sent, max_length=128, do_sample=False)[0]['generated_text']
51
+ if corrected.strip() != sent.strip():
52
+ grammar_issues.append(sent)
53
+ suggestions.append(f"Grammar: Replace '{sent}' → '{corrected}'")
54
+
55
+ # Check for toxicity
56
+ result = toxicity_detector(sent)
57
+ if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
58
+ toxic_issues.append(sent)
59
+ suggestions.append(f"Toxicity: Rephrase '{sent}' (score: {result[0]['score']:.2f})")
60
+
61
+ highlighted = highlight_text(text, grammar_issues, toxic_issues)
62
+ sug_text = "\n".join(f"{i+1}. {s}" for i, s in enumerate(suggestions))
63
+ return highlighted, sug_text, suggestions
64
+
65
+ # Apply approved suggestions
66
+ def apply_changes(original_text, suggestions, indices):
67
+ try:
68
+ indices = [int(i.strip()) - 1 for i in indices.split(",") if i.strip().isdigit()]
69
+ sentences = sent_tokenize(original_text)
70
+ for i in indices:
71
+ if i < len(suggestions):
72
+ match = re.search(r"'(.*?)'\s*→\s*'(.*?)'", suggestions[i])
73
+ if match:
74
+ old = match.group(1)
75
+ new = match.group(2)
76
+ original_text = original_text.replace(old, new)
77
+ return original_text
78
+ except Exception as e:
79
+ return f"Error applying changes: {str(e)}"
80
+
81
+ # Gradio UI
82
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
83
+ gr.Markdown("## ✨ Blog Content Reviewer (LLM-powered)")
84
+ gr.Markdown("Paste blog content or a blog URL. AI will detect grammar issues & sensitive content.")
85
 
86
+ input_type = gr.Radio(["Text", "URL"], value="Text", label="Input Type")
87
  text_input = gr.Textbox(label="Blog Text", lines=10, visible=True)
88
  url_input = gr.Textbox(label="Blog URL", visible=False)
89
 
90
+ input_type.change(lambda t: {text_input: gr.update(visible=t=="Text"),
91
+ url_input: gr.update(visible=t=="URL")},
92
+ input_type, [text_input, url_input])
 
 
 
 
93
 
94
+ review_button = gr.Button("Review")
95
+ highlighted_output = gr.HTML()
96
+ suggestions_output = gr.Textbox(label="Suggestions", lines=10)
97
+ approve_input = gr.Textbox(label="Approve Suggestions (e.g., 1,2)")
98
+ apply_button = gr.Button("Apply Changes")
99
+ final_text = gr.Textbox(label="Final Output", lines=10)
100
 
101
  suggestions_state = gr.State()
102
 
103
+ review_button.click(
104
+ fn=review_blog,
105
+ inputs=[input_type, text_input, url_input],
106
+ outputs=[highlighted_output, suggestions_output, suggestions_state]
107
+ )
108
+
109
+ apply_button.click(
110
+ fn=apply_changes,
111
+ inputs=[text_input, suggestions_state, approve_input],
112
+ outputs=[final_text]
113
+ )
114
 
115
  demo.launch()